/[pcre]/code/trunk/pcre_jit_compile.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1272 - (show annotations)
Thu Mar 7 11:30:01 2013 UTC (6 years, 9 months ago) by zherczeg
File MIME type: text/plain
File size: 282026 byte(s)
Error occurred while calculating annotation data.
(*PRUNE) is now supported by the JIT compiler.
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2013 University of Cambridge
10
11 The machine code generator part (this module) was written by Zoltan Herczeg
12 Copyright (c) 2010-2013
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include "pcre_internal.h"
48
49 #if defined SUPPORT_JIT
50
51 /* All-in-one: Since we use the JIT compiler only from here,
52 we just include it. This way we don't need to touch the build
53 system files. */
54
55 #define SLJIT_MALLOC(size) (PUBL(malloc))(size)
56 #define SLJIT_FREE(ptr) (PUBL(free))(ptr)
57 #define SLJIT_CONFIG_AUTO 1
58 #define SLJIT_CONFIG_STATIC 1
59 #define SLJIT_VERBOSE 0
60 #define SLJIT_DEBUG 0
61
62 #include "sljit/sljitLir.c"
63
64 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 #error Unsupported architecture
66 #endif
67
68 /* Defines for debugging purposes. */
69
70 /* 1 - Use unoptimized capturing brackets.
71 2 - Enable capture_last_ptr (includes option 1). */
72 /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
73
74 /* Allocate memory for the regex stack on the real machine stack.
75 Fast, but limited size. */
76 #define MACHINE_STACK_SIZE 32768
77
78 /* Growth rate for stack allocated by the OS. Should be the multiply
79 of page size. */
80 #define STACK_GROWTH_RATE 8192
81
82 /* Enable to check that the allocation could destroy temporaries. */
83 #if defined SLJIT_DEBUG && SLJIT_DEBUG
84 #define DESTROY_REGISTERS 1
85 #endif
86
87 /*
88 Short summary about the backtracking mechanism empolyed by the jit code generator:
89
90 The code generator follows the recursive nature of the PERL compatible regular
91 expressions. The basic blocks of regular expressions are condition checkers
92 whose execute different commands depending on the result of the condition check.
93 The relationship between the operators can be horizontal (concatenation) and
94 vertical (sub-expression) (See struct backtrack_common for more details).
95
96 'ab' - 'a' and 'b' regexps are concatenated
97 'a+' - 'a' is the sub-expression of the '+' operator
98
99 The condition checkers are boolean (true/false) checkers. Machine code is generated
100 for the checker itself and for the actions depending on the result of the checker.
101 The 'true' case is called as the matching path (expected path), and the other is called as
102 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
103 branches on the matching path.
104
105 Greedy star operator (*) :
106 Matching path: match happens.
107 Backtrack path: match failed.
108 Non-greedy star operator (*?) :
109 Matching path: no need to perform a match.
110 Backtrack path: match is required.
111
112 The following example shows how the code generated for a capturing bracket
113 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
114 we have the following regular expression:
115
116 A(B|C)D
117
118 The generated code will be the following:
119
120 A matching path
121 '(' matching path (pushing arguments to the stack)
122 B matching path
123 ')' matching path (pushing arguments to the stack)
124 D matching path
125 return with successful match
126
127 D backtrack path
128 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
129 B backtrack path
130 C expected path
131 jump to D matching path
132 C backtrack path
133 A backtrack path
134
135 Notice, that the order of backtrack code paths are the opposite of the fast
136 code paths. In this way the topmost value on the stack is always belong
137 to the current backtrack code path. The backtrack path must check
138 whether there is a next alternative. If so, it needs to jump back to
139 the matching path eventually. Otherwise it needs to clear out its own stack
140 frame and continue the execution on the backtrack code paths.
141 */
142
143 /*
144 Saved stack frames:
145
146 Atomic blocks and asserts require reloading the values of private data
147 when the backtrack mechanism performed. Because of OP_RECURSE, the data
148 are not necessarly known in compile time, thus we need a dynamic restore
149 mechanism.
150
151 The stack frames are stored in a chain list, and have the following format:
152 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
153
154 Thus we can restore the private data to a particular point in the stack.
155 */
156
157 typedef struct jit_arguments {
158 /* Pointers first. */
159 struct sljit_stack *stack;
160 const pcre_uchar *str;
161 const pcre_uchar *begin;
162 const pcre_uchar *end;
163 int *offsets;
164 pcre_uchar *uchar_ptr;
165 pcre_uchar *mark_ptr;
166 void *callout_data;
167 /* Everything else after. */
168 int real_offset_count;
169 int offset_count;
170 int call_limit;
171 pcre_uint8 notbol;
172 pcre_uint8 noteol;
173 pcre_uint8 notempty;
174 pcre_uint8 notempty_atstart;
175 } jit_arguments;
176
177 typedef struct executable_functions {
178 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
179 PUBL(jit_callback) callback;
180 void *userdata;
181 pcre_uint32 top_bracket;
182 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
183 } executable_functions;
184
185 typedef struct jump_list {
186 struct sljit_jump *jump;
187 struct jump_list *next;
188 } jump_list;
189
190 typedef struct stub_list {
191 struct sljit_jump *start;
192 struct sljit_label *quit;
193 struct stub_list *next;
194 } stub_list;
195
196 enum frame_types { no_frame = -1, no_stack = -2 };
197
198 typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
199
200 /* The following structure is the key data type for the recursive
201 code generator. It is allocated by compile_matchingpath, and contains
202 the aguments for compile_backtrackingpath. Must be the first member
203 of its descendants. */
204 typedef struct backtrack_common {
205 /* Concatenation stack. */
206 struct backtrack_common *prev;
207 jump_list *nextbacktracks;
208 /* Internal stack (for component operators). */
209 struct backtrack_common *top;
210 jump_list *topbacktracks;
211 /* Opcode pointer. */
212 pcre_uchar *cc;
213 } backtrack_common;
214
215 typedef struct assert_backtrack {
216 backtrack_common common;
217 jump_list *condfailed;
218 /* Less than 0 (-1) if a frame is not needed. */
219 int framesize;
220 /* Points to our private memory word on the stack. */
221 int private_data_ptr;
222 /* For iterators. */
223 struct sljit_label *matchingpath;
224 } assert_backtrack;
225
226 typedef struct bracket_backtrack {
227 backtrack_common common;
228 /* Where to coninue if an alternative is successfully matched. */
229 struct sljit_label *alternative_matchingpath;
230 /* For rmin and rmax iterators. */
231 struct sljit_label *recursive_matchingpath;
232 /* For greedy ? operator. */
233 struct sljit_label *zero_matchingpath;
234 /* Contains the branches of a failed condition. */
235 union {
236 /* Both for OP_COND, OP_SCOND. */
237 jump_list *condfailed;
238 assert_backtrack *assert;
239 /* For OP_ONCE. -1 if not needed. */
240 int framesize;
241 } u;
242 /* Points to our private memory word on the stack. */
243 int private_data_ptr;
244 } bracket_backtrack;
245
246 typedef struct bracketpos_backtrack {
247 backtrack_common common;
248 /* Points to our private memory word on the stack. */
249 int private_data_ptr;
250 /* Reverting stack is needed. */
251 int framesize;
252 /* Allocated stack size. */
253 int stacksize;
254 } bracketpos_backtrack;
255
256 typedef struct braminzero_backtrack {
257 backtrack_common common;
258 struct sljit_label *matchingpath;
259 } braminzero_backtrack;
260
261 typedef struct iterator_backtrack {
262 backtrack_common common;
263 /* Next iteration. */
264 struct sljit_label *matchingpath;
265 } iterator_backtrack;
266
267 typedef struct recurse_entry {
268 struct recurse_entry *next;
269 /* Contains the function entry. */
270 struct sljit_label *entry;
271 /* Collects the calls until the function is not created. */
272 jump_list *calls;
273 /* Points to the starting opcode. */
274 int start;
275 } recurse_entry;
276
277 typedef struct recurse_backtrack {
278 backtrack_common common;
279 BOOL inlined_pattern;
280 } recurse_backtrack;
281
282 #define MAX_RANGE_SIZE 6
283
284 typedef struct compiler_common {
285 /* The sljit ceneric compiler. */
286 struct sljit_compiler *compiler;
287 /* First byte code. */
288 pcre_uchar *start;
289 /* Maps private data offset to each opcode. */
290 int *private_data_ptrs;
291 /* Tells whether the capturing bracket is optimized. */
292 pcre_uint8 *optimized_cbracket;
293 /* Starting offset of private data for capturing brackets. */
294 int cbra_ptr;
295 /* Output vector starting point. Must be divisible by 2. */
296 int ovector_start;
297 /* Last known position of the requested byte. */
298 int req_char_ptr;
299 /* Head of the last recursion. */
300 int recursive_head_ptr;
301 /* First inspected character for partial matching. */
302 int start_used_ptr;
303 /* Starting pointer for partial soft matches. */
304 int hit_start;
305 /* End pointer of the first line. */
306 int first_line_end;
307 /* Points to the marked string. */
308 int mark_ptr;
309 /* Points to the last matched capture block index. */
310 int capture_last_ptr;
311 /* Points to the starting position of the current match. */
312 int start_ptr;
313
314 /* Flipped and lower case tables. */
315 const pcre_uint8 *fcc;
316 sljit_sw lcc;
317 /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
318 int mode;
319 /* \K is in the pattern. */
320 BOOL has_set_som;
321 /* Needs to know the start position anytime. */
322 BOOL needs_start_ptr;
323 /* Currently in compile_recurse. */
324 BOOL in_recurse;
325 /* Newline control. */
326 int nltype;
327 int newline;
328 int bsr_nltype;
329 /* Dollar endonly. */
330 int endonly;
331 /* Tables. */
332 sljit_sw ctypes;
333 int digits[2 + MAX_RANGE_SIZE];
334 /* Named capturing brackets. */
335 sljit_uw name_table;
336 sljit_sw name_count;
337 sljit_sw name_entry_size;
338
339 /* Labels and jump lists. */
340 struct sljit_label *partialmatchlabel;
341 struct sljit_label *quit_label;
342 struct sljit_label *forced_quit_label;
343 struct sljit_label *accept_label;
344 stub_list *stubs;
345 recurse_entry *entries;
346 recurse_entry *currententry;
347 jump_list *partialmatch;
348 jump_list *quit;
349 jump_list *forced_quit;
350 jump_list *accept;
351 jump_list *calllimit;
352 jump_list *stackalloc;
353 jump_list *revertframes;
354 jump_list *wordboundary;
355 jump_list *anynewline;
356 jump_list *hspace;
357 jump_list *vspace;
358 jump_list *casefulcmp;
359 jump_list *caselesscmp;
360 jump_list *reset_match;
361 BOOL jscript_compat;
362 #ifdef SUPPORT_UTF
363 BOOL utf;
364 #ifdef SUPPORT_UCP
365 BOOL use_ucp;
366 #endif
367 #ifndef COMPILE_PCRE32
368 jump_list *utfreadchar;
369 #endif
370 #ifdef COMPILE_PCRE8
371 jump_list *utfreadtype8;
372 #endif
373 #endif /* SUPPORT_UTF */
374 #ifdef SUPPORT_UCP
375 jump_list *getucd;
376 #endif
377 } compiler_common;
378
379 /* For byte_sequence_compare. */
380
381 typedef struct compare_context {
382 int length;
383 int sourcereg;
384 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
385 int ucharptr;
386 union {
387 sljit_si asint;
388 sljit_uh asushort;
389 #if defined COMPILE_PCRE8
390 sljit_ub asbyte;
391 sljit_ub asuchars[4];
392 #elif defined COMPILE_PCRE16
393 sljit_uh asuchars[2];
394 #elif defined COMPILE_PCRE32
395 sljit_ui asuchars[1];
396 #endif
397 } c;
398 union {
399 sljit_si asint;
400 sljit_uh asushort;
401 #if defined COMPILE_PCRE8
402 sljit_ub asbyte;
403 sljit_ub asuchars[4];
404 #elif defined COMPILE_PCRE16
405 sljit_uh asuchars[2];
406 #elif defined COMPILE_PCRE32
407 sljit_ui asuchars[1];
408 #endif
409 } oc;
410 #endif
411 } compare_context;
412
413 /* Undefine sljit macros. */
414 #undef CMP
415
416 /* Used for accessing the elements of the stack. */
417 #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_sw))
418
419 #define TMP1 SLJIT_SCRATCH_REG1
420 #define TMP2 SLJIT_SCRATCH_REG3
421 #define TMP3 SLJIT_TEMPORARY_EREG2
422 #define STR_PTR SLJIT_SAVED_REG1
423 #define STR_END SLJIT_SAVED_REG2
424 #define STACK_TOP SLJIT_SCRATCH_REG2
425 #define STACK_LIMIT SLJIT_SAVED_REG3
426 #define ARGUMENTS SLJIT_SAVED_EREG1
427 #define CALL_COUNT SLJIT_SAVED_EREG2
428 #define RETURN_ADDR SLJIT_TEMPORARY_EREG1
429
430 /* Local space layout. */
431 /* These two locals can be used by the current opcode. */
432 #define LOCALS0 (0 * sizeof(sljit_sw))
433 #define LOCALS1 (1 * sizeof(sljit_sw))
434 /* Two local variables for possessive quantifiers (char1 cannot use them). */
435 #define POSSESSIVE0 (2 * sizeof(sljit_sw))
436 #define POSSESSIVE1 (3 * sizeof(sljit_sw))
437 /* Max limit of recursions. */
438 #define CALL_LIMIT (4 * sizeof(sljit_sw))
439 /* The output vector is stored on the stack, and contains pointers
440 to characters. The vector data is divided into two groups: the first
441 group contains the start / end character pointers, and the second is
442 the start pointers when the end of the capturing group has not yet reached. */
443 #define OVECTOR_START (common->ovector_start)
444 #define OVECTOR(i) (OVECTOR_START + (i) * sizeof(sljit_sw))
445 #define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * sizeof(sljit_sw))
446 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
447
448 #if defined COMPILE_PCRE8
449 #define MOV_UCHAR SLJIT_MOV_UB
450 #define MOVU_UCHAR SLJIT_MOVU_UB
451 #elif defined COMPILE_PCRE16
452 #define MOV_UCHAR SLJIT_MOV_UH
453 #define MOVU_UCHAR SLJIT_MOVU_UH
454 #elif defined COMPILE_PCRE32
455 #define MOV_UCHAR SLJIT_MOV_UI
456 #define MOVU_UCHAR SLJIT_MOVU_UI
457 #else
458 #error Unsupported compiling mode
459 #endif
460
461 /* Shortcuts. */
462 #define DEFINE_COMPILER \
463 struct sljit_compiler *compiler = common->compiler
464 #define OP1(op, dst, dstw, src, srcw) \
465 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
466 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
467 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
468 #define LABEL() \
469 sljit_emit_label(compiler)
470 #define JUMP(type) \
471 sljit_emit_jump(compiler, (type))
472 #define JUMPTO(type, label) \
473 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
474 #define JUMPHERE(jump) \
475 sljit_set_label((jump), sljit_emit_label(compiler))
476 #define SET_LABEL(jump, label) \
477 sljit_set_label((jump), (label))
478 #define CMP(type, src1, src1w, src2, src2w) \
479 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
480 #define CMPTO(type, src1, src1w, src2, src2w, label) \
481 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
482 #define OP_FLAGS(op, dst, dstw, src, srcw, type) \
483 sljit_emit_op_flags(compiler, (op), (dst), (dstw), (src), (srcw), (type))
484 #define GET_LOCAL_BASE(dst, dstw, offset) \
485 sljit_get_local_base(compiler, (dst), (dstw), (offset))
486
487 static pcre_uchar* bracketend(pcre_uchar* cc)
488 {
489 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
490 do cc += GET(cc, 1); while (*cc == OP_ALT);
491 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
492 cc += 1 + LINK_SIZE;
493 return cc;
494 }
495
496 /* Functions whose might need modification for all new supported opcodes:
497 next_opcode
498 get_private_data_length
499 set_private_data_ptrs
500 get_framesize
501 init_frame
502 get_private_data_length_for_copy
503 copy_private_data
504 compile_matchingpath
505 compile_backtrackingpath
506 */
507
508 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
509 {
510 SLJIT_UNUSED_ARG(common);
511 switch(*cc)
512 {
513 case OP_SOD:
514 case OP_SOM:
515 case OP_SET_SOM:
516 case OP_NOT_WORD_BOUNDARY:
517 case OP_WORD_BOUNDARY:
518 case OP_NOT_DIGIT:
519 case OP_DIGIT:
520 case OP_NOT_WHITESPACE:
521 case OP_WHITESPACE:
522 case OP_NOT_WORDCHAR:
523 case OP_WORDCHAR:
524 case OP_ANY:
525 case OP_ALLANY:
526 case OP_NOTPROP:
527 case OP_PROP:
528 case OP_ANYNL:
529 case OP_NOT_HSPACE:
530 case OP_HSPACE:
531 case OP_NOT_VSPACE:
532 case OP_VSPACE:
533 case OP_EXTUNI:
534 case OP_EODN:
535 case OP_EOD:
536 case OP_CIRC:
537 case OP_CIRCM:
538 case OP_DOLL:
539 case OP_DOLLM:
540 case OP_CRSTAR:
541 case OP_CRMINSTAR:
542 case OP_CRPLUS:
543 case OP_CRMINPLUS:
544 case OP_CRQUERY:
545 case OP_CRMINQUERY:
546 case OP_CRRANGE:
547 case OP_CRMINRANGE:
548 case OP_CLASS:
549 case OP_NCLASS:
550 case OP_REF:
551 case OP_REFI:
552 case OP_RECURSE:
553 case OP_CALLOUT:
554 case OP_ALT:
555 case OP_KET:
556 case OP_KETRMAX:
557 case OP_KETRMIN:
558 case OP_KETRPOS:
559 case OP_REVERSE:
560 case OP_ASSERT:
561 case OP_ASSERT_NOT:
562 case OP_ASSERTBACK:
563 case OP_ASSERTBACK_NOT:
564 case OP_ONCE:
565 case OP_ONCE_NC:
566 case OP_BRA:
567 case OP_BRAPOS:
568 case OP_CBRA:
569 case OP_CBRAPOS:
570 case OP_COND:
571 case OP_SBRA:
572 case OP_SBRAPOS:
573 case OP_SCBRA:
574 case OP_SCBRAPOS:
575 case OP_SCOND:
576 case OP_CREF:
577 case OP_NCREF:
578 case OP_RREF:
579 case OP_NRREF:
580 case OP_DEF:
581 case OP_BRAZERO:
582 case OP_BRAMINZERO:
583 case OP_BRAPOSZERO:
584 case OP_PRUNE:
585 case OP_COMMIT:
586 case OP_FAIL:
587 case OP_ACCEPT:
588 case OP_ASSERT_ACCEPT:
589 case OP_CLOSE:
590 case OP_SKIPZERO:
591 return cc + PRIV(OP_lengths)[*cc];
592
593 case OP_CHAR:
594 case OP_CHARI:
595 case OP_NOT:
596 case OP_NOTI:
597 case OP_STAR:
598 case OP_MINSTAR:
599 case OP_PLUS:
600 case OP_MINPLUS:
601 case OP_QUERY:
602 case OP_MINQUERY:
603 case OP_UPTO:
604 case OP_MINUPTO:
605 case OP_EXACT:
606 case OP_POSSTAR:
607 case OP_POSPLUS:
608 case OP_POSQUERY:
609 case OP_POSUPTO:
610 case OP_STARI:
611 case OP_MINSTARI:
612 case OP_PLUSI:
613 case OP_MINPLUSI:
614 case OP_QUERYI:
615 case OP_MINQUERYI:
616 case OP_UPTOI:
617 case OP_MINUPTOI:
618 case OP_EXACTI:
619 case OP_POSSTARI:
620 case OP_POSPLUSI:
621 case OP_POSQUERYI:
622 case OP_POSUPTOI:
623 case OP_NOTSTAR:
624 case OP_NOTMINSTAR:
625 case OP_NOTPLUS:
626 case OP_NOTMINPLUS:
627 case OP_NOTQUERY:
628 case OP_NOTMINQUERY:
629 case OP_NOTUPTO:
630 case OP_NOTMINUPTO:
631 case OP_NOTEXACT:
632 case OP_NOTPOSSTAR:
633 case OP_NOTPOSPLUS:
634 case OP_NOTPOSQUERY:
635 case OP_NOTPOSUPTO:
636 case OP_NOTSTARI:
637 case OP_NOTMINSTARI:
638 case OP_NOTPLUSI:
639 case OP_NOTMINPLUSI:
640 case OP_NOTQUERYI:
641 case OP_NOTMINQUERYI:
642 case OP_NOTUPTOI:
643 case OP_NOTMINUPTOI:
644 case OP_NOTEXACTI:
645 case OP_NOTPOSSTARI:
646 case OP_NOTPOSPLUSI:
647 case OP_NOTPOSQUERYI:
648 case OP_NOTPOSUPTOI:
649 cc += PRIV(OP_lengths)[*cc];
650 #ifdef SUPPORT_UTF
651 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
652 #endif
653 return cc;
654
655 /* Special cases. */
656 case OP_TYPESTAR:
657 case OP_TYPEMINSTAR:
658 case OP_TYPEPLUS:
659 case OP_TYPEMINPLUS:
660 case OP_TYPEQUERY:
661 case OP_TYPEMINQUERY:
662 case OP_TYPEUPTO:
663 case OP_TYPEMINUPTO:
664 case OP_TYPEEXACT:
665 case OP_TYPEPOSSTAR:
666 case OP_TYPEPOSPLUS:
667 case OP_TYPEPOSQUERY:
668 case OP_TYPEPOSUPTO:
669 return cc + PRIV(OP_lengths)[*cc] - 1;
670
671 case OP_ANYBYTE:
672 #ifdef SUPPORT_UTF
673 if (common->utf) return NULL;
674 #endif
675 return cc + 1;
676
677 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
678 case OP_XCLASS:
679 return cc + GET(cc, 1);
680 #endif
681
682 case OP_MARK:
683 case OP_PRUNE_ARG:
684 return cc + 1 + 2 + cc[1];
685
686 default:
687 return NULL;
688 }
689 }
690
691 #define CASE_ITERATOR_PRIVATE_DATA_1 \
692 case OP_MINSTAR: \
693 case OP_MINPLUS: \
694 case OP_QUERY: \
695 case OP_MINQUERY: \
696 case OP_MINSTARI: \
697 case OP_MINPLUSI: \
698 case OP_QUERYI: \
699 case OP_MINQUERYI: \
700 case OP_NOTMINSTAR: \
701 case OP_NOTMINPLUS: \
702 case OP_NOTQUERY: \
703 case OP_NOTMINQUERY: \
704 case OP_NOTMINSTARI: \
705 case OP_NOTMINPLUSI: \
706 case OP_NOTQUERYI: \
707 case OP_NOTMINQUERYI:
708
709 #define CASE_ITERATOR_PRIVATE_DATA_2A \
710 case OP_STAR: \
711 case OP_PLUS: \
712 case OP_STARI: \
713 case OP_PLUSI: \
714 case OP_NOTSTAR: \
715 case OP_NOTPLUS: \
716 case OP_NOTSTARI: \
717 case OP_NOTPLUSI:
718
719 #define CASE_ITERATOR_PRIVATE_DATA_2B \
720 case OP_UPTO: \
721 case OP_MINUPTO: \
722 case OP_UPTOI: \
723 case OP_MINUPTOI: \
724 case OP_NOTUPTO: \
725 case OP_NOTMINUPTO: \
726 case OP_NOTUPTOI: \
727 case OP_NOTMINUPTOI:
728
729 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
730 case OP_TYPEMINSTAR: \
731 case OP_TYPEMINPLUS: \
732 case OP_TYPEQUERY: \
733 case OP_TYPEMINQUERY:
734
735 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
736 case OP_TYPESTAR: \
737 case OP_TYPEPLUS:
738
739 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
740 case OP_TYPEUPTO: \
741 case OP_TYPEMINUPTO:
742
743 static int get_class_iterator_size(pcre_uchar *cc)
744 {
745 switch(*cc)
746 {
747 case OP_CRSTAR:
748 case OP_CRPLUS:
749 return 2;
750
751 case OP_CRMINSTAR:
752 case OP_CRMINPLUS:
753 case OP_CRQUERY:
754 case OP_CRMINQUERY:
755 return 1;
756
757 case OP_CRRANGE:
758 case OP_CRMINRANGE:
759 if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
760 return 0;
761 return 2;
762
763 default:
764 return 0;
765 }
766 }
767
768 static int get_private_data_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
769 {
770 int private_data_length = 0;
771 pcre_uchar *alternative;
772 pcre_uchar *name;
773 pcre_uchar *end = NULL;
774 int space, size, i;
775 pcre_uint32 bracketlen;
776
777 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
778 while (cc < ccend)
779 {
780 space = 0;
781 size = 0;
782 bracketlen = 0;
783 switch(*cc)
784 {
785 case OP_SET_SOM:
786 common->has_set_som = TRUE;
787 cc += 1;
788 break;
789
790 case OP_REF:
791 case OP_REFI:
792 common->optimized_cbracket[GET2(cc, 1)] = 0;
793 cc += 1 + IMM2_SIZE;
794 break;
795
796 case OP_ASSERT:
797 case OP_ASSERT_NOT:
798 case OP_ASSERTBACK:
799 case OP_ASSERTBACK_NOT:
800 case OP_ONCE:
801 case OP_ONCE_NC:
802 case OP_BRAPOS:
803 case OP_SBRA:
804 case OP_SBRAPOS:
805 private_data_length += sizeof(sljit_sw);
806 bracketlen = 1 + LINK_SIZE;
807 break;
808
809 case OP_CBRAPOS:
810 case OP_SCBRAPOS:
811 private_data_length += sizeof(sljit_sw);
812 common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
813 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
814 break;
815
816 case OP_COND:
817 case OP_SCOND:
818 /* Only AUTO_CALLOUT can insert this opcode. We do
819 not intend to support this case. */
820 if (cc[1 + LINK_SIZE] == OP_CALLOUT)
821 return -1;
822
823 if (*cc == OP_COND)
824 {
825 /* Might be a hidden SCOND. */
826 alternative = cc + GET(cc, 1);
827 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
828 private_data_length += sizeof(sljit_sw);
829 }
830 else
831 private_data_length += sizeof(sljit_sw);
832 bracketlen = 1 + LINK_SIZE;
833 break;
834
835 case OP_CREF:
836 i = GET2(cc, 1);
837 common->optimized_cbracket[i] = 0;
838 cc += 1 + IMM2_SIZE;
839 break;
840
841 case OP_NCREF:
842 bracketlen = GET2(cc, 1);
843 name = (pcre_uchar *)common->name_table;
844 alternative = name;
845 for (i = 0; i < common->name_count; i++)
846 {
847 if (GET2(name, 0) == bracketlen) break;
848 name += common->name_entry_size;
849 }
850 SLJIT_ASSERT(i != common->name_count);
851
852 for (i = 0; i < common->name_count; i++)
853 {
854 if (STRCMP_UC_UC(alternative + IMM2_SIZE, name + IMM2_SIZE) == 0)
855 common->optimized_cbracket[GET2(alternative, 0)] = 0;
856 alternative += common->name_entry_size;
857 }
858 bracketlen = 0;
859 cc += 1 + IMM2_SIZE;
860 break;
861
862 case OP_BRA:
863 bracketlen = 1 + LINK_SIZE;
864 break;
865
866 case OP_CBRA:
867 case OP_SCBRA:
868 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
869 break;
870
871 CASE_ITERATOR_PRIVATE_DATA_1
872 space = 1;
873 size = -2;
874 break;
875
876 CASE_ITERATOR_PRIVATE_DATA_2A
877 space = 2;
878 size = -2;
879 break;
880
881 CASE_ITERATOR_PRIVATE_DATA_2B
882 space = 2;
883 size = -(2 + IMM2_SIZE);
884 break;
885
886 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
887 space = 1;
888 size = 1;
889 break;
890
891 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
892 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
893 space = 2;
894 size = 1;
895 break;
896
897 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
898 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
899 space = 2;
900 size = 1 + IMM2_SIZE;
901 break;
902
903 case OP_CLASS:
904 case OP_NCLASS:
905 size += 1 + 32 / sizeof(pcre_uchar);
906 space = get_class_iterator_size(cc + size);
907 break;
908
909 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
910 case OP_XCLASS:
911 size = GET(cc, 1);
912 space = get_class_iterator_size(cc + size);
913 break;
914 #endif
915
916 case OP_RECURSE:
917 /* Set its value only once. */
918 if (common->recursive_head_ptr == 0)
919 {
920 common->recursive_head_ptr = common->ovector_start;
921 common->ovector_start += sizeof(sljit_sw);
922 }
923 cc += 1 + LINK_SIZE;
924 break;
925
926 case OP_CALLOUT:
927 if (common->capture_last_ptr == 0)
928 {
929 common->capture_last_ptr = common->ovector_start;
930 common->ovector_start += sizeof(sljit_sw);
931 }
932 cc += 2 + 2 * LINK_SIZE;
933 break;
934
935 case OP_PRUNE_ARG:
936 common->needs_start_ptr = TRUE;
937 /* Fall through. */
938
939 case OP_MARK:
940 if (common->mark_ptr == 0)
941 {
942 common->mark_ptr = common->ovector_start;
943 common->ovector_start += sizeof(sljit_sw);
944 }
945 cc += 1 + 2 + cc[1];
946 break;
947
948 case OP_PRUNE:
949 common->needs_start_ptr = TRUE;
950 cc += 1;
951 break;
952
953 default:
954 cc = next_opcode(common, cc);
955 if (cc == NULL)
956 return -1;
957 break;
958 }
959
960 if (space > 0 && cc >= end)
961 private_data_length += sizeof(sljit_sw) * space;
962
963 if (size != 0)
964 {
965 if (size < 0)
966 {
967 cc += -size;
968 #ifdef SUPPORT_UTF
969 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
970 #endif
971 }
972 else
973 cc += size;
974 }
975
976 if (bracketlen != 0)
977 {
978 if (cc >= end)
979 {
980 end = bracketend(cc);
981 if (end[-1 - LINK_SIZE] == OP_KET)
982 end = NULL;
983 }
984 cc += bracketlen;
985 }
986 }
987 return private_data_length;
988 }
989
990 static void set_private_data_ptrs(compiler_common *common, int private_data_ptr, pcre_uchar *ccend)
991 {
992 pcre_uchar *cc = common->start;
993 pcre_uchar *alternative;
994 pcre_uchar *end = NULL;
995 int space, size, bracketlen;
996
997 while (cc < ccend)
998 {
999 space = 0;
1000 size = 0;
1001 bracketlen = 0;
1002 switch(*cc)
1003 {
1004 case OP_ASSERT:
1005 case OP_ASSERT_NOT:
1006 case OP_ASSERTBACK:
1007 case OP_ASSERTBACK_NOT:
1008 case OP_ONCE:
1009 case OP_ONCE_NC:
1010 case OP_BRAPOS:
1011 case OP_SBRA:
1012 case OP_SBRAPOS:
1013 case OP_SCOND:
1014 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1015 private_data_ptr += sizeof(sljit_sw);
1016 bracketlen = 1 + LINK_SIZE;
1017 break;
1018
1019 case OP_CBRAPOS:
1020 case OP_SCBRAPOS:
1021 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1022 private_data_ptr += sizeof(sljit_sw);
1023 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1024 break;
1025
1026 case OP_COND:
1027 /* Might be a hidden SCOND. */
1028 alternative = cc + GET(cc, 1);
1029 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1030 {
1031 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1032 private_data_ptr += sizeof(sljit_sw);
1033 }
1034 bracketlen = 1 + LINK_SIZE;
1035 break;
1036
1037 case OP_BRA:
1038 bracketlen = 1 + LINK_SIZE;
1039 break;
1040
1041 case OP_CBRA:
1042 case OP_SCBRA:
1043 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1044 break;
1045
1046 CASE_ITERATOR_PRIVATE_DATA_1
1047 space = 1;
1048 size = -2;
1049 break;
1050
1051 CASE_ITERATOR_PRIVATE_DATA_2A
1052 space = 2;
1053 size = -2;
1054 break;
1055
1056 CASE_ITERATOR_PRIVATE_DATA_2B
1057 space = 2;
1058 size = -(2 + IMM2_SIZE);
1059 break;
1060
1061 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1062 space = 1;
1063 size = 1;
1064 break;
1065
1066 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1067 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1068 space = 2;
1069 size = 1;
1070 break;
1071
1072 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1073 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1074 space = 2;
1075 size = 1 + IMM2_SIZE;
1076 break;
1077
1078 case OP_CLASS:
1079 case OP_NCLASS:
1080 size += 1 + 32 / sizeof(pcre_uchar);
1081 space = get_class_iterator_size(cc + size);
1082 break;
1083
1084 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1085 case OP_XCLASS:
1086 size = GET(cc, 1);
1087 space = get_class_iterator_size(cc + size);
1088 break;
1089 #endif
1090
1091 default:
1092 cc = next_opcode(common, cc);
1093 SLJIT_ASSERT(cc != NULL);
1094 break;
1095 }
1096
1097 if (space > 0 && cc >= end)
1098 {
1099 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1100 private_data_ptr += sizeof(sljit_sw) * space;
1101 }
1102
1103 if (size != 0)
1104 {
1105 if (size < 0)
1106 {
1107 cc += -size;
1108 #ifdef SUPPORT_UTF
1109 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1110 #endif
1111 }
1112 else
1113 cc += size;
1114 }
1115
1116 if (bracketlen > 0)
1117 {
1118 if (cc >= end)
1119 {
1120 end = bracketend(cc);
1121 if (end[-1 - LINK_SIZE] == OP_KET)
1122 end = NULL;
1123 }
1124 cc += bracketlen;
1125 }
1126 }
1127 }
1128
1129 /* Returns with a frame_types (always < 0) if no need for frame. */
1130 static int get_framesize(compiler_common *common, pcre_uchar *cc, BOOL recursive)
1131 {
1132 pcre_uchar *ccend = bracketend(cc) - (1 + LINK_SIZE);
1133 int length = 0;
1134 int possessive = 0;
1135 BOOL stack_restore = FALSE;
1136 BOOL setsom_found = recursive;
1137 BOOL setmark_found = recursive;
1138 /* The last capture is a local variable even for recursions. */
1139 BOOL capture_last_found = FALSE;
1140
1141 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1142 {
1143 possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1144 /* This is correct regardless of common->capture_last_ptr. */
1145 capture_last_found = TRUE;
1146 }
1147
1148 cc = next_opcode(common, cc);
1149 SLJIT_ASSERT(cc != NULL);
1150 while (cc < ccend)
1151 switch(*cc)
1152 {
1153 case OP_SET_SOM:
1154 SLJIT_ASSERT(common->has_set_som);
1155 stack_restore = TRUE;
1156 if (!setsom_found)
1157 {
1158 length += 2;
1159 setsom_found = TRUE;
1160 }
1161 cc += 1;
1162 break;
1163
1164 case OP_MARK:
1165 case OP_PRUNE_ARG:
1166 SLJIT_ASSERT(common->mark_ptr != 0);
1167 stack_restore = TRUE;
1168 if (!setmark_found)
1169 {
1170 length += 2;
1171 setmark_found = TRUE;
1172 }
1173 cc += 1 + 2 + cc[1];
1174 break;
1175
1176 case OP_RECURSE:
1177 stack_restore = TRUE;
1178 if (common->has_set_som && !setsom_found)
1179 {
1180 length += 2;
1181 setsom_found = TRUE;
1182 }
1183 if (common->mark_ptr != 0 && !setmark_found)
1184 {
1185 length += 2;
1186 setmark_found = TRUE;
1187 }
1188 if (common->capture_last_ptr != 0 && !capture_last_found)
1189 {
1190 length += 2;
1191 capture_last_found = TRUE;
1192 }
1193 cc += 1 + LINK_SIZE;
1194 break;
1195
1196 case OP_CBRA:
1197 case OP_CBRAPOS:
1198 case OP_SCBRA:
1199 case OP_SCBRAPOS:
1200 stack_restore = TRUE;
1201 if (common->capture_last_ptr != 0 && !capture_last_found)
1202 {
1203 length += 2;
1204 capture_last_found = TRUE;
1205 }
1206 length += 3;
1207 cc += 1 + LINK_SIZE + IMM2_SIZE;
1208 break;
1209
1210 default:
1211 stack_restore = TRUE;
1212 /* Fall through. */
1213
1214 case OP_NOT_WORD_BOUNDARY:
1215 case OP_WORD_BOUNDARY:
1216 case OP_NOT_DIGIT:
1217 case OP_DIGIT:
1218 case OP_NOT_WHITESPACE:
1219 case OP_WHITESPACE:
1220 case OP_NOT_WORDCHAR:
1221 case OP_WORDCHAR:
1222 case OP_ANY:
1223 case OP_ALLANY:
1224 case OP_ANYBYTE:
1225 case OP_NOTPROP:
1226 case OP_PROP:
1227 case OP_ANYNL:
1228 case OP_NOT_HSPACE:
1229 case OP_HSPACE:
1230 case OP_NOT_VSPACE:
1231 case OP_VSPACE:
1232 case OP_EXTUNI:
1233 case OP_EODN:
1234 case OP_EOD:
1235 case OP_CIRC:
1236 case OP_CIRCM:
1237 case OP_DOLL:
1238 case OP_DOLLM:
1239 case OP_CHAR:
1240 case OP_CHARI:
1241 case OP_NOT:
1242 case OP_NOTI:
1243
1244 case OP_EXACT:
1245 case OP_POSSTAR:
1246 case OP_POSPLUS:
1247 case OP_POSQUERY:
1248 case OP_POSUPTO:
1249
1250 case OP_EXACTI:
1251 case OP_POSSTARI:
1252 case OP_POSPLUSI:
1253 case OP_POSQUERYI:
1254 case OP_POSUPTOI:
1255
1256 case OP_NOTEXACT:
1257 case OP_NOTPOSSTAR:
1258 case OP_NOTPOSPLUS:
1259 case OP_NOTPOSQUERY:
1260 case OP_NOTPOSUPTO:
1261
1262 case OP_NOTEXACTI:
1263 case OP_NOTPOSSTARI:
1264 case OP_NOTPOSPLUSI:
1265 case OP_NOTPOSQUERYI:
1266 case OP_NOTPOSUPTOI:
1267
1268 case OP_TYPEEXACT:
1269 case OP_TYPEPOSSTAR:
1270 case OP_TYPEPOSPLUS:
1271 case OP_TYPEPOSQUERY:
1272 case OP_TYPEPOSUPTO:
1273
1274 case OP_CLASS:
1275 case OP_NCLASS:
1276 case OP_XCLASS:
1277
1278 cc = next_opcode(common, cc);
1279 SLJIT_ASSERT(cc != NULL);
1280 break;
1281 }
1282
1283 /* Possessive quantifiers can use a special case. */
1284 if (SLJIT_UNLIKELY(possessive == length))
1285 return stack_restore ? no_frame : no_stack;
1286
1287 if (length > 0)
1288 return length + 1;
1289 return stack_restore ? no_frame : no_stack;
1290 }
1291
1292 static void init_frame(compiler_common *common, pcre_uchar *cc, int stackpos, int stacktop, BOOL recursive)
1293 {
1294 DEFINE_COMPILER;
1295 pcre_uchar *ccend = bracketend(cc) - (1 + LINK_SIZE);
1296 BOOL setsom_found = recursive;
1297 BOOL setmark_found = recursive;
1298 /* The last capture is a local variable even for recursions. */
1299 BOOL capture_last_found = FALSE;
1300 int offset;
1301
1302 /* >= 1 + shortest item size (2) */
1303 SLJIT_UNUSED_ARG(stacktop);
1304 SLJIT_ASSERT(stackpos >= stacktop + 2);
1305
1306 stackpos = STACK(stackpos);
1307 if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1308 cc = next_opcode(common, cc);
1309 SLJIT_ASSERT(cc != NULL);
1310 while (cc < ccend)
1311 switch(*cc)
1312 {
1313 case OP_SET_SOM:
1314 SLJIT_ASSERT(common->has_set_som);
1315 if (!setsom_found)
1316 {
1317 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1318 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1319 stackpos += (int)sizeof(sljit_sw);
1320 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1321 stackpos += (int)sizeof(sljit_sw);
1322 setsom_found = TRUE;
1323 }
1324 cc += 1;
1325 break;
1326
1327 case OP_MARK:
1328 case OP_PRUNE_ARG:
1329 SLJIT_ASSERT(common->mark_ptr != 0);
1330 if (!setmark_found)
1331 {
1332 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1333 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1334 stackpos += (int)sizeof(sljit_sw);
1335 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1336 stackpos += (int)sizeof(sljit_sw);
1337 setmark_found = TRUE;
1338 }
1339 cc += 1 + 2 + cc[1];
1340 break;
1341
1342 case OP_RECURSE:
1343 if (common->has_set_som && !setsom_found)
1344 {
1345 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1346 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1347 stackpos += (int)sizeof(sljit_sw);
1348 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1349 stackpos += (int)sizeof(sljit_sw);
1350 setsom_found = TRUE;
1351 }
1352 if (common->mark_ptr != 0 && !setmark_found)
1353 {
1354 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1355 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1356 stackpos += (int)sizeof(sljit_sw);
1357 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1358 stackpos += (int)sizeof(sljit_sw);
1359 setmark_found = TRUE;
1360 }
1361 if (common->capture_last_ptr != 0 && !capture_last_found)
1362 {
1363 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
1364 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1365 stackpos += (int)sizeof(sljit_sw);
1366 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1367 stackpos += (int)sizeof(sljit_sw);
1368 capture_last_found = TRUE;
1369 }
1370 cc += 1 + LINK_SIZE;
1371 break;
1372
1373 case OP_CBRA:
1374 case OP_CBRAPOS:
1375 case OP_SCBRA:
1376 case OP_SCBRAPOS:
1377 if (common->capture_last_ptr != 0 && !capture_last_found)
1378 {
1379 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
1380 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1381 stackpos += (int)sizeof(sljit_sw);
1382 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1383 stackpos += (int)sizeof(sljit_sw);
1384 capture_last_found = TRUE;
1385 }
1386 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1387 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1388 stackpos += (int)sizeof(sljit_sw);
1389 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
1390 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
1391 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1392 stackpos += (int)sizeof(sljit_sw);
1393 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1394 stackpos += (int)sizeof(sljit_sw);
1395
1396 cc += 1 + LINK_SIZE + IMM2_SIZE;
1397 break;
1398
1399 default:
1400 cc = next_opcode(common, cc);
1401 SLJIT_ASSERT(cc != NULL);
1402 break;
1403 }
1404
1405 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
1406 SLJIT_ASSERT(stackpos == STACK(stacktop));
1407 }
1408
1409 static SLJIT_INLINE int get_private_data_length_for_copy(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
1410 {
1411 int private_data_length = 2;
1412 int size;
1413 pcre_uchar *alternative;
1414 /* Calculate the sum of the private machine words. */
1415 while (cc < ccend)
1416 {
1417 size = 0;
1418 switch(*cc)
1419 {
1420 case OP_ASSERT:
1421 case OP_ASSERT_NOT:
1422 case OP_ASSERTBACK:
1423 case OP_ASSERTBACK_NOT:
1424 case OP_ONCE:
1425 case OP_ONCE_NC:
1426 case OP_BRAPOS:
1427 case OP_SBRA:
1428 case OP_SBRAPOS:
1429 case OP_SCOND:
1430 private_data_length++;
1431 cc += 1 + LINK_SIZE;
1432 break;
1433
1434 case OP_CBRA:
1435 case OP_SCBRA:
1436 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1437 private_data_length++;
1438 cc += 1 + LINK_SIZE + IMM2_SIZE;
1439 break;
1440
1441 case OP_CBRAPOS:
1442 case OP_SCBRAPOS:
1443 private_data_length += 2;
1444 cc += 1 + LINK_SIZE + IMM2_SIZE;
1445 break;
1446
1447 case OP_COND:
1448 /* Might be a hidden SCOND. */
1449 alternative = cc + GET(cc, 1);
1450 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1451 private_data_length++;
1452 cc += 1 + LINK_SIZE;
1453 break;
1454
1455 CASE_ITERATOR_PRIVATE_DATA_1
1456 if (PRIVATE_DATA(cc))
1457 private_data_length++;
1458 cc += 2;
1459 #ifdef SUPPORT_UTF
1460 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1461 #endif
1462 break;
1463
1464 CASE_ITERATOR_PRIVATE_DATA_2A
1465 if (PRIVATE_DATA(cc))
1466 private_data_length += 2;
1467 cc += 2;
1468 #ifdef SUPPORT_UTF
1469 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1470 #endif
1471 break;
1472
1473 CASE_ITERATOR_PRIVATE_DATA_2B
1474 if (PRIVATE_DATA(cc))
1475 private_data_length += 2;
1476 cc += 2 + IMM2_SIZE;
1477 #ifdef SUPPORT_UTF
1478 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1479 #endif
1480 break;
1481
1482 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1483 if (PRIVATE_DATA(cc))
1484 private_data_length++;
1485 cc += 1;
1486 break;
1487
1488 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1489 if (PRIVATE_DATA(cc))
1490 private_data_length += 2;
1491 cc += 1;
1492 break;
1493
1494 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1495 if (PRIVATE_DATA(cc))
1496 private_data_length += 2;
1497 cc += 1 + IMM2_SIZE;
1498 break;
1499
1500 case OP_CLASS:
1501 case OP_NCLASS:
1502 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1503 case OP_XCLASS:
1504 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1505 #else
1506 size = 1 + 32 / (int)sizeof(pcre_uchar);
1507 #endif
1508 if (PRIVATE_DATA(cc))
1509 private_data_length += get_class_iterator_size(cc + size);
1510 cc += size;
1511 break;
1512
1513 default:
1514 cc = next_opcode(common, cc);
1515 SLJIT_ASSERT(cc != NULL);
1516 break;
1517 }
1518 }
1519 SLJIT_ASSERT(cc == ccend);
1520 return private_data_length;
1521 }
1522
1523 static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1524 BOOL save, int stackptr, int stacktop)
1525 {
1526 DEFINE_COMPILER;
1527 int srcw[2];
1528 int count, size;
1529 BOOL tmp1next = TRUE;
1530 BOOL tmp1empty = TRUE;
1531 BOOL tmp2empty = TRUE;
1532 pcre_uchar *alternative;
1533 enum {
1534 start,
1535 loop,
1536 end
1537 } status;
1538
1539 status = save ? start : loop;
1540 stackptr = STACK(stackptr - 2);
1541 stacktop = STACK(stacktop - 1);
1542
1543 if (!save)
1544 {
1545 stackptr += sizeof(sljit_sw);
1546 if (stackptr < stacktop)
1547 {
1548 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1549 stackptr += sizeof(sljit_sw);
1550 tmp1empty = FALSE;
1551 }
1552 if (stackptr < stacktop)
1553 {
1554 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1555 stackptr += sizeof(sljit_sw);
1556 tmp2empty = FALSE;
1557 }
1558 /* The tmp1next must be TRUE in either way. */
1559 }
1560
1561 while (status != end)
1562 {
1563 count = 0;
1564 switch(status)
1565 {
1566 case start:
1567 SLJIT_ASSERT(save && common->recursive_head_ptr != 0);
1568 count = 1;
1569 srcw[0] = common->recursive_head_ptr;
1570 status = loop;
1571 break;
1572
1573 case loop:
1574 if (cc >= ccend)
1575 {
1576 status = end;
1577 break;
1578 }
1579
1580 switch(*cc)
1581 {
1582 case OP_ASSERT:
1583 case OP_ASSERT_NOT:
1584 case OP_ASSERTBACK:
1585 case OP_ASSERTBACK_NOT:
1586 case OP_ONCE:
1587 case OP_ONCE_NC:
1588 case OP_BRAPOS:
1589 case OP_SBRA:
1590 case OP_SBRAPOS:
1591 case OP_SCOND:
1592 count = 1;
1593 srcw[0] = PRIVATE_DATA(cc);
1594 SLJIT_ASSERT(srcw[0] != 0);
1595 cc += 1 + LINK_SIZE;
1596 break;
1597
1598 case OP_CBRA:
1599 case OP_SCBRA:
1600 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1601 {
1602 count = 1;
1603 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1604 }
1605 cc += 1 + LINK_SIZE + IMM2_SIZE;
1606 break;
1607
1608 case OP_CBRAPOS:
1609 case OP_SCBRAPOS:
1610 count = 2;
1611 srcw[0] = PRIVATE_DATA(cc);
1612 srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1613 SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0);
1614 cc += 1 + LINK_SIZE + IMM2_SIZE;
1615 break;
1616
1617 case OP_COND:
1618 /* Might be a hidden SCOND. */
1619 alternative = cc + GET(cc, 1);
1620 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1621 {
1622 count = 1;
1623 srcw[0] = PRIVATE_DATA(cc);
1624 SLJIT_ASSERT(srcw[0] != 0);
1625 }
1626 cc += 1 + LINK_SIZE;
1627 break;
1628
1629 CASE_ITERATOR_PRIVATE_DATA_1
1630 if (PRIVATE_DATA(cc))
1631 {
1632 count = 1;
1633 srcw[0] = PRIVATE_DATA(cc);
1634 }
1635 cc += 2;
1636 #ifdef SUPPORT_UTF
1637 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1638 #endif
1639 break;
1640
1641 CASE_ITERATOR_PRIVATE_DATA_2A
1642 if (PRIVATE_DATA(cc))
1643 {
1644 count = 2;
1645 srcw[0] = PRIVATE_DATA(cc);
1646 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1647 }
1648 cc += 2;
1649 #ifdef SUPPORT_UTF
1650 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1651 #endif
1652 break;
1653
1654 CASE_ITERATOR_PRIVATE_DATA_2B
1655 if (PRIVATE_DATA(cc))
1656 {
1657 count = 2;
1658 srcw[0] = PRIVATE_DATA(cc);
1659 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1660 }
1661 cc += 2 + IMM2_SIZE;
1662 #ifdef SUPPORT_UTF
1663 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1664 #endif
1665 break;
1666
1667 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1668 if (PRIVATE_DATA(cc))
1669 {
1670 count = 1;
1671 srcw[0] = PRIVATE_DATA(cc);
1672 }
1673 cc += 1;
1674 break;
1675
1676 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1677 if (PRIVATE_DATA(cc))
1678 {
1679 count = 2;
1680 srcw[0] = PRIVATE_DATA(cc);
1681 srcw[1] = srcw[0] + sizeof(sljit_sw);
1682 }
1683 cc += 1;
1684 break;
1685
1686 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1687 if (PRIVATE_DATA(cc))
1688 {
1689 count = 2;
1690 srcw[0] = PRIVATE_DATA(cc);
1691 srcw[1] = srcw[0] + sizeof(sljit_sw);
1692 }
1693 cc += 1 + IMM2_SIZE;
1694 break;
1695
1696 case OP_CLASS:
1697 case OP_NCLASS:
1698 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1699 case OP_XCLASS:
1700 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1701 #else
1702 size = 1 + 32 / (int)sizeof(pcre_uchar);
1703 #endif
1704 if (PRIVATE_DATA(cc))
1705 switch(get_class_iterator_size(cc + size))
1706 {
1707 case 1:
1708 count = 1;
1709 srcw[0] = PRIVATE_DATA(cc);
1710 break;
1711
1712 case 2:
1713 count = 2;
1714 srcw[0] = PRIVATE_DATA(cc);
1715 srcw[1] = srcw[0] + sizeof(sljit_sw);
1716 break;
1717
1718 default:
1719 SLJIT_ASSERT_STOP();
1720 break;
1721 }
1722 cc += size;
1723 break;
1724
1725 default:
1726 cc = next_opcode(common, cc);
1727 SLJIT_ASSERT(cc != NULL);
1728 break;
1729 }
1730 break;
1731
1732 case end:
1733 SLJIT_ASSERT_STOP();
1734 break;
1735 }
1736
1737 while (count > 0)
1738 {
1739 count--;
1740 if (save)
1741 {
1742 if (tmp1next)
1743 {
1744 if (!tmp1empty)
1745 {
1746 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1747 stackptr += sizeof(sljit_sw);
1748 }
1749 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1750 tmp1empty = FALSE;
1751 tmp1next = FALSE;
1752 }
1753 else
1754 {
1755 if (!tmp2empty)
1756 {
1757 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1758 stackptr += sizeof(sljit_sw);
1759 }
1760 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1761 tmp2empty = FALSE;
1762 tmp1next = TRUE;
1763 }
1764 }
1765 else
1766 {
1767 if (tmp1next)
1768 {
1769 SLJIT_ASSERT(!tmp1empty);
1770 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP1, 0);
1771 tmp1empty = stackptr >= stacktop;
1772 if (!tmp1empty)
1773 {
1774 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1775 stackptr += sizeof(sljit_sw);
1776 }
1777 tmp1next = FALSE;
1778 }
1779 else
1780 {
1781 SLJIT_ASSERT(!tmp2empty);
1782 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP2, 0);
1783 tmp2empty = stackptr >= stacktop;
1784 if (!tmp2empty)
1785 {
1786 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1787 stackptr += sizeof(sljit_sw);
1788 }
1789 tmp1next = TRUE;
1790 }
1791 }
1792 }
1793 }
1794
1795 if (save)
1796 {
1797 if (tmp1next)
1798 {
1799 if (!tmp1empty)
1800 {
1801 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1802 stackptr += sizeof(sljit_sw);
1803 }
1804 if (!tmp2empty)
1805 {
1806 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1807 stackptr += sizeof(sljit_sw);
1808 }
1809 }
1810 else
1811 {
1812 if (!tmp2empty)
1813 {
1814 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1815 stackptr += sizeof(sljit_sw);
1816 }
1817 if (!tmp1empty)
1818 {
1819 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1820 stackptr += sizeof(sljit_sw);
1821 }
1822 }
1823 }
1824 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1825 }
1826
1827 #undef CASE_ITERATOR_PRIVATE_DATA_1
1828 #undef CASE_ITERATOR_PRIVATE_DATA_2A
1829 #undef CASE_ITERATOR_PRIVATE_DATA_2B
1830 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1831 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1832 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1833
1834 static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
1835 {
1836 return (value & (value - 1)) == 0;
1837 }
1838
1839 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
1840 {
1841 while (list)
1842 {
1843 /* sljit_set_label is clever enough to do nothing
1844 if either the jump or the label is NULL. */
1845 SET_LABEL(list->jump, label);
1846 list = list->next;
1847 }
1848 }
1849
1850 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump* jump)
1851 {
1852 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
1853 if (list_item)
1854 {
1855 list_item->next = *list;
1856 list_item->jump = jump;
1857 *list = list_item;
1858 }
1859 }
1860
1861 static void add_stub(compiler_common *common, struct sljit_jump *start)
1862 {
1863 DEFINE_COMPILER;
1864 stub_list* list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
1865
1866 if (list_item)
1867 {
1868 list_item->start = start;
1869 list_item->quit = LABEL();
1870 list_item->next = common->stubs;
1871 common->stubs = list_item;
1872 }
1873 }
1874
1875 static void flush_stubs(compiler_common *common)
1876 {
1877 DEFINE_COMPILER;
1878 stub_list* list_item = common->stubs;
1879
1880 while (list_item)
1881 {
1882 JUMPHERE(list_item->start);
1883 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
1884 JUMPTO(SLJIT_JUMP, list_item->quit);
1885 list_item = list_item->next;
1886 }
1887 common->stubs = NULL;
1888 }
1889
1890 static SLJIT_INLINE void decrease_call_count(compiler_common *common)
1891 {
1892 DEFINE_COMPILER;
1893
1894 OP2(SLJIT_SUB | SLJIT_SET_E, CALL_COUNT, 0, CALL_COUNT, 0, SLJIT_IMM, 1);
1895 add_jump(compiler, &common->calllimit, JUMP(SLJIT_C_ZERO));
1896 }
1897
1898 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
1899 {
1900 /* May destroy all locals and registers except TMP2. */
1901 DEFINE_COMPILER;
1902
1903 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
1904 #ifdef DESTROY_REGISTERS
1905 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
1906 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
1907 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
1908 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, TMP1, 0);
1909 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
1910 #endif
1911 add_stub(common, CMP(SLJIT_C_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
1912 }
1913
1914 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
1915 {
1916 DEFINE_COMPILER;
1917 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
1918 }
1919
1920 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
1921 {
1922 DEFINE_COMPILER;
1923 struct sljit_label *loop;
1924 int i;
1925
1926 /* At this point we can freely use all temporary registers. */
1927 SLJIT_ASSERT(length > 1);
1928 /* TMP1 returns with begin - 1. */
1929 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
1930 if (length < 8)
1931 {
1932 for (i = 1; i < length; i++)
1933 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), SLJIT_SCRATCH_REG1, 0);
1934 }
1935 else
1936 {
1937 GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, OVECTOR_START);
1938 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, length - 1);
1939 loop = LABEL();
1940 OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(sljit_sw), SLJIT_SCRATCH_REG1, 0);
1941 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 1);
1942 JUMPTO(SLJIT_C_NOT_ZERO, loop);
1943 }
1944 }
1945
1946 static void do_reset_match(compiler_common *common, int length)
1947 {
1948 DEFINE_COMPILER;
1949 struct sljit_label *loop;
1950 int i;
1951
1952 SLJIT_ASSERT(length > 1);
1953 /* OVECTOR(1) contains the "string begin - 1" constant. */
1954 if (length > 2)
1955 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
1956 OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
1957 if (length < 8)
1958 {
1959 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
1960 for (i = 2; i < length; i++)
1961 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), TMP1, 0);
1962 }
1963 else
1964 {
1965 GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
1966 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_IMM, length - 2);
1967 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
1968 loop = LABEL();
1969 OP1(SLJIT_MOVU, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
1970 OP2(SLJIT_SUB | SLJIT_SET_E, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);
1971 JUMPTO(SLJIT_C_NOT_ZERO, loop);
1972 }
1973 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, base));
1974 }
1975
1976 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
1977 {
1978 DEFINE_COMPILER;
1979 struct sljit_label *loop;
1980 struct sljit_jump *early_quit;
1981
1982 /* At this point we can freely use all registers. */
1983 OP1(SLJIT_MOV, SLJIT_SAVED_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
1984 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1), STR_PTR, 0);
1985
1986 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, ARGUMENTS, 0);
1987 if (common->mark_ptr != 0)
1988 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1989 OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offset_count));
1990 if (common->mark_ptr != 0)
1991 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_SCRATCH_REG3, 0);
1992 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
1993 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, begin));
1994 GET_LOCAL_BASE(SLJIT_SAVED_REG1, 0, OVECTOR_START);
1995 /* Unlikely, but possible */
1996 early_quit = CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 0);
1997 loop = LABEL();
1998 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), 0, SLJIT_SCRATCH_REG1, 0);
1999 OP2(SLJIT_ADD, SLJIT_SAVED_REG1, 0, SLJIT_SAVED_REG1, 0, SLJIT_IMM, sizeof(sljit_sw));
2000 /* Copy the integer value to the output buffer */
2001 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2002 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
2003 #endif
2004 OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG3), sizeof(int), SLJIT_SAVED_REG2, 0);
2005 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1);
2006 JUMPTO(SLJIT_C_NOT_ZERO, loop);
2007 JUMPHERE(early_quit);
2008
2009 /* Calculate the return value, which is the maximum ovector value. */
2010 if (topbracket > 1)
2011 {
2012 GET_LOCAL_BASE(SLJIT_SCRATCH_REG1, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
2013 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, topbracket + 1);
2014
2015 /* OVECTOR(0) is never equal to SLJIT_SAVED_REG3. */
2016 loop = LABEL();
2017 OP1(SLJIT_MOVU, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), -(2 * (sljit_sw)sizeof(sljit_sw)));
2018 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1);
2019 CMPTO(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG3, 0, loop);
2020 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_SCRATCH_REG2, 0);
2021 }
2022 else
2023 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
2024 }
2025
2026 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
2027 {
2028 DEFINE_COMPILER;
2029 struct sljit_jump *jump;
2030
2031 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_SAVED_REG2, str_end_must_be_saved_reg2);
2032 SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
2033 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
2034
2035 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0);
2036 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
2037 OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, real_offset_count));
2038 CMPTO(SLJIT_C_SIG_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 2, quit);
2039
2040 /* Store match begin and end. */
2041 OP1(SLJIT_MOV, SLJIT_SAVED_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, begin));
2042 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, offsets));
2043
2044 jump = CMP(SLJIT_C_SIG_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 3);
2045 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_ptr : (common->hit_start + sizeof(sljit_sw)), SLJIT_SAVED_REG1, 0);
2046 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2047 OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);
2048 #endif
2049 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), 2 * sizeof(int), SLJIT_SCRATCH_REG3, 0);
2050 JUMPHERE(jump);
2051
2052 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
2053 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, STR_END, 0, SLJIT_SAVED_REG1, 0);
2054 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2055 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
2056 #endif
2057 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(int), SLJIT_SAVED_REG2, 0);
2058
2059 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG1, 0);
2060 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2061 OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);
2062 #endif
2063 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), 0, SLJIT_SCRATCH_REG3, 0);
2064
2065 JUMPTO(SLJIT_JUMP, quit);
2066 }
2067
2068 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
2069 {
2070 /* May destroy TMP1. */
2071 DEFINE_COMPILER;
2072 struct sljit_jump *jump;
2073
2074 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2075 {
2076 /* The value of -1 must be kept for start_used_ptr! */
2077 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, 1);
2078 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
2079 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
2080 jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
2081 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2082 JUMPHERE(jump);
2083 }
2084 else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
2085 {
2086 jump = CMP(SLJIT_C_LESS_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2087 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2088 JUMPHERE(jump);
2089 }
2090 }
2091
2092 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar* cc)
2093 {
2094 /* Detects if the character has an othercase. */
2095 unsigned int c;
2096
2097 #ifdef SUPPORT_UTF
2098 if (common->utf)
2099 {
2100 GETCHAR(c, cc);
2101 if (c > 127)
2102 {
2103 #ifdef SUPPORT_UCP
2104 return c != UCD_OTHERCASE(c);
2105 #else
2106 return FALSE;
2107 #endif
2108 }
2109 #ifndef COMPILE_PCRE8
2110 return common->fcc[c] != c;
2111 #endif
2112 }
2113 else
2114 #endif
2115 c = *cc;
2116 return MAX_255(c) ? common->fcc[c] != c : FALSE;
2117 }
2118
2119 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
2120 {
2121 /* Returns with the othercase. */
2122 #ifdef SUPPORT_UTF
2123 if (common->utf && c > 127)
2124 {
2125 #ifdef SUPPORT_UCP
2126 return UCD_OTHERCASE(c);
2127 #else
2128 return c;
2129 #endif
2130 }
2131 #endif
2132 return TABLE_GET(c, common->fcc, c);
2133 }
2134
2135 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar* cc)
2136 {
2137 /* Detects if the character and its othercase has only 1 bit difference. */
2138 unsigned int c, oc, bit;
2139 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2140 int n;
2141 #endif
2142
2143 #ifdef SUPPORT_UTF
2144 if (common->utf)
2145 {
2146 GETCHAR(c, cc);
2147 if (c <= 127)
2148 oc = common->fcc[c];
2149 else
2150 {
2151 #ifdef SUPPORT_UCP
2152 oc = UCD_OTHERCASE(c);
2153 #else
2154 oc = c;
2155 #endif
2156 }
2157 }
2158 else
2159 {
2160 c = *cc;
2161 oc = TABLE_GET(c, common->fcc, c);
2162 }
2163 #else
2164 c = *cc;
2165 oc = TABLE_GET(c, common->fcc, c);
2166 #endif
2167
2168 SLJIT_ASSERT(c != oc);
2169
2170 bit = c ^ oc;
2171 /* Optimized for English alphabet. */
2172 if (c <= 127 && bit == 0x20)
2173 return (0 << 8) | 0x20;
2174
2175 /* Since c != oc, they must have at least 1 bit difference. */
2176 if (!is_powerof2(bit))
2177 return 0;
2178
2179 #if defined COMPILE_PCRE8
2180
2181 #ifdef SUPPORT_UTF
2182 if (common->utf && c > 127)
2183 {
2184 n = GET_EXTRALEN(*cc);
2185 while ((bit & 0x3f) == 0)
2186 {
2187 n--;
2188 bit >>= 6;
2189 }
2190 return (n << 8) | bit;
2191 }
2192 #endif /* SUPPORT_UTF */
2193 return (0 << 8) | bit;
2194
2195 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2196
2197 #ifdef SUPPORT_UTF
2198 if (common->utf && c > 65535)
2199 {
2200 if (bit >= (1 << 10))
2201 bit >>= 10;
2202 else
2203 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
2204 }
2205 #endif /* SUPPORT_UTF */
2206 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
2207
2208 #endif /* COMPILE_PCRE[8|16|32] */
2209 }
2210
2211 static void check_partial(compiler_common *common, BOOL force)
2212 {
2213 /* Checks whether a partial matching is occured. Does not modify registers. */
2214 DEFINE_COMPILER;
2215 struct sljit_jump *jump = NULL;
2216
2217 SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
2218
2219 if (common->mode == JIT_COMPILE)
2220 return;
2221
2222 if (!force)
2223 jump = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2224 else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2225 jump = CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, -1);
2226
2227 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2228 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2229 else
2230 {
2231 if (common->partialmatchlabel != NULL)
2232 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2233 else
2234 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2235 }
2236
2237 if (jump != NULL)
2238 JUMPHERE(jump);
2239 }
2240
2241 static void check_str_end(compiler_common *common, jump_list **end_reached)
2242 {
2243 /* Does not affect registers. Usually used in a tight spot. */
2244 DEFINE_COMPILER;
2245 struct sljit_jump *jump;
2246
2247 if (common->mode == JIT_COMPILE)
2248 {
2249 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2250 return;
2251 }
2252
2253 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2254 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2255 {
2256 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2257 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2258 add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
2259 }
2260 else
2261 {
2262 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2263 if (common->partialmatchlabel != NULL)
2264 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2265 else
2266 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2267 }
2268 JUMPHERE(jump);
2269 }
2270
2271 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
2272 {
2273 DEFINE_COMPILER;
2274 struct sljit_jump *jump;
2275
2276 if (common->mode == JIT_COMPILE)
2277 {
2278 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2279 return;
2280 }
2281
2282 /* Partial matching mode. */
2283 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2284 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2285 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2286 {
2287 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2288 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2289 }
2290 else
2291 {
2292 if (common->partialmatchlabel != NULL)
2293 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2294 else
2295 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2296 }
2297 JUMPHERE(jump);
2298 }
2299
2300 static void read_char(compiler_common *common)
2301 {
2302 /* Reads the character into TMP1, updates STR_PTR.
2303 Does not check STR_END. TMP2 Destroyed. */
2304 DEFINE_COMPILER;
2305 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2306 struct sljit_jump *jump;
2307 #endif
2308
2309 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2310 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2311 if (common->utf)
2312 {
2313 #if defined COMPILE_PCRE8
2314 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2315 #elif defined COMPILE_PCRE16
2316 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2317 #endif /* COMPILE_PCRE[8|16] */
2318 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2319 JUMPHERE(jump);
2320 }
2321 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2322 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2323 }
2324
2325 static void peek_char(compiler_common *common)
2326 {
2327 /* Reads the character into TMP1, keeps STR_PTR.
2328 Does not check STR_END. TMP2 Destroyed. */
2329 DEFINE_COMPILER;
2330 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2331 struct sljit_jump *jump;
2332 #endif
2333
2334 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2335 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2336 if (common->utf)
2337 {
2338 #if defined COMPILE_PCRE8
2339 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2340 #elif defined COMPILE_PCRE16
2341 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2342 #endif /* COMPILE_PCRE[8|16] */
2343 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2344 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2345 JUMPHERE(jump);
2346 }
2347 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2348 }
2349
2350 static void read_char8_type(compiler_common *common)
2351 {
2352 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
2353 DEFINE_COMPILER;
2354 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2355 struct sljit_jump *jump;
2356 #endif
2357
2358 #ifdef SUPPORT_UTF
2359 if (common->utf)
2360 {
2361 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2362 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2363 #if defined COMPILE_PCRE8
2364 /* This can be an extra read in some situations, but hopefully
2365 it is needed in most cases. */
2366 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2367 jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2368 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
2369 JUMPHERE(jump);
2370 #elif defined COMPILE_PCRE16
2371 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2372 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2373 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2374 JUMPHERE(jump);
2375 /* Skip low surrogate if necessary. */
2376 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xfc00);
2377 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xd800);
2378 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2379 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
2380 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2381 #elif defined COMPILE_PCRE32
2382 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2383 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2384 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2385 JUMPHERE(jump);
2386 #endif /* COMPILE_PCRE[8|16|32] */
2387 return;
2388 }
2389 #endif /* SUPPORT_UTF */
2390 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2391 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2392 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2393 /* The ctypes array contains only 256 values. */
2394 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2395 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2396 #endif
2397 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2398 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2399 JUMPHERE(jump);
2400 #endif
2401 }
2402
2403 static void skip_char_back(compiler_common *common)
2404 {
2405 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
2406 DEFINE_COMPILER;
2407 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2408 #if defined COMPILE_PCRE8
2409 struct sljit_label *label;
2410
2411 if (common->utf)
2412 {
2413 label = LABEL();
2414 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2415 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2416 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
2417 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
2418 return;
2419 }
2420 #elif defined COMPILE_PCRE16
2421 if (common->utf)
2422 {
2423 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2424 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2425 /* Skip low surrogate if necessary. */
2426 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2427 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
2428 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2429 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2430 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2431 return;
2432 }
2433 #endif /* COMPILE_PCRE[8|16] */
2434 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2435 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2436 }
2437
2438 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpiftrue)
2439 {
2440 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
2441 DEFINE_COMPILER;
2442
2443 if (nltype == NLTYPE_ANY)
2444 {
2445 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
2446 add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2447 }
2448 else if (nltype == NLTYPE_ANYCRLF)
2449 {
2450 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_CR);
2451 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2452 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
2453 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
2454 add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2455 }
2456 else
2457 {
2458 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
2459 add_jump(compiler, backtracks, CMP(jumpiftrue ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
2460 }
2461 }
2462
2463 #ifdef SUPPORT_UTF
2464
2465 #if defined COMPILE_PCRE8
2466 static void do_utfreadchar(compiler_common *common)
2467 {
2468 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2469 of the character (>= 0xc0). Return char value in TMP1, length - 1 in TMP2. */
2470 DEFINE_COMPILER;
2471 struct sljit_jump *jump;
2472
2473 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2474 /* Searching for the first zero. */
2475 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
2476 jump = JUMP(SLJIT_C_NOT_ZERO);
2477 /* Two byte sequence. */
2478 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2479 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2480 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1f);
2481 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2482 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2483 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2484 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2485 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2486 JUMPHERE(jump);
2487
2488 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10);
2489 jump = JUMP(SLJIT_C_NOT_ZERO);
2490 /* Three byte sequence. */
2491 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2492 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0f);
2493 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 12);
2494 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2495 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2496 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2497 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2498 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2499 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2500 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2501 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
2502 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2503 JUMPHERE(jump);
2504
2505 /* Four byte sequence. */
2506 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2507 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x07);
2508 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 18);
2509 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2510 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
2511 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2512 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2513 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2514 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2515 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2516 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(3));
2517 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2518 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2519 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2520 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
2521 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2522 }
2523
2524 static void do_utfreadtype8(compiler_common *common)
2525 {
2526 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
2527 of the character (>= 0xc0). Return value in TMP1. */
2528 DEFINE_COMPILER;
2529 struct sljit_jump *jump;
2530 struct sljit_jump *compare;
2531
2532 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2533
2534 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
2535 jump = JUMP(SLJIT_C_NOT_ZERO);
2536 /* Two byte sequence. */
2537 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2538 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2539 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
2540 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2541 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2542 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
2543 compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2544 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2545 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2546
2547 JUMPHERE(compare);
2548 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2549 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2550 JUMPHERE(jump);
2551
2552 /* We only have types for characters less than 256. */
2553 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2554 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2555 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2556 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2557 }
2558
2559 #elif defined COMPILE_PCRE16
2560
2561 static void do_utfreadchar(compiler_common *common)
2562 {
2563 /* Fast decoding a UTF-16 character. TMP1 contains the first 16 bit char
2564 of the character (>= 0xd800). Return char value in TMP1, length - 1 in TMP2. */
2565 DEFINE_COMPILER;
2566 struct sljit_jump *jump;
2567
2568 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2569 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);
2570 /* Do nothing, only return. */
2571 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2572
2573 JUMPHERE(jump);
2574 /* Combine two 16 bit characters. */
2575 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2576 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2577 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2578 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);
2579 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3ff);
2580 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2581 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2582 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2583 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2584 }
2585
2586 #endif /* COMPILE_PCRE[8|16] */
2587
2588 #endif /* SUPPORT_UTF */
2589
2590 #ifdef SUPPORT_UCP
2591
2592 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
2593 #define UCD_BLOCK_MASK 127
2594 #define UCD_BLOCK_SHIFT 7
2595
2596 static void do_getucd(compiler_common *common)
2597 {
2598 /* Search the UCD record for the character comes in TMP1.
2599 Returns chartype in TMP1 and UCD offset in TMP2. */
2600 DEFINE_COMPILER;
2601
2602 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
2603
2604 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2605 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2606 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
2607 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
2608 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2609 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
2610 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
2611 OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
2612 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
2613 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
2614 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2615 }
2616 #endif
2617
2618 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
2619 {
2620 DEFINE_COMPILER;
2621 struct sljit_label *mainloop;
2622 struct sljit_label *newlinelabel = NULL;
2623 struct sljit_jump *start;
2624 struct sljit_jump *end = NULL;
2625 struct sljit_jump *nl = NULL;
2626 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2627 struct sljit_jump *singlechar;
2628 #endif
2629 jump_list *newline = NULL;
2630 BOOL newlinecheck = FALSE;
2631 BOOL readuchar = FALSE;
2632
2633 if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
2634 common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
2635 newlinecheck = TRUE;
2636
2637 if (firstline)
2638 {
2639 /* Search for the end of the first line. */
2640 SLJIT_ASSERT(common->first_line_end != 0);
2641 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
2642
2643 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2644 {
2645 mainloop = LABEL();
2646 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2647 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2648 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
2649 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2650 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
2651 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
2652 JUMPHERE(end);
2653 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2654 }
2655 else
2656 {
2657 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2658 mainloop = LABEL();
2659 /* Continual stores does not cause data dependency. */
2660 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2661 read_char(common);
2662 check_newlinechar(common, common->nltype, &newline, TRUE);
2663 CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);
2664 JUMPHERE(end);
2665 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2666 set_jumps(newline, LABEL());
2667 }
2668
2669 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
2670 }
2671
2672 start = JUMP(SLJIT_JUMP);
2673
2674 if (newlinecheck)
2675 {
2676 newlinelabel = LABEL();
2677 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2678 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2679 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2680 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
2681 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2682 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2683 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
2684 #endif
2685 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2686 nl = JUMP(SLJIT_JUMP);
2687 }
2688
2689 mainloop = LABEL();
2690
2691 /* Increasing the STR_PTR here requires one less jump in the most common case. */
2692 #ifdef SUPPORT_UTF
2693 if (common->utf) readuchar = TRUE;
2694 #endif
2695 if (newlinecheck) readuchar = TRUE;
2696
2697 if (readuchar)
2698 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2699
2700 if (newlinecheck)
2701 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
2702
2703 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2704 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2705 #if defined COMPILE_PCRE8
2706 if (common->utf)
2707 {
2708 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2709 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2710 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2711 JUMPHERE(singlechar);
2712 }
2713 #elif defined COMPILE_PCRE16
2714 if (common->utf)
2715 {
2716 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2717 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2718 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2719 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2720 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2721 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2722 JUMPHERE(singlechar);
2723 }
2724 #endif /* COMPILE_PCRE[8|16] */
2725 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2726 JUMPHERE(start);
2727
2728 if (newlinecheck)
2729 {
2730 JUMPHERE(end);
2731 JUMPHERE(nl);
2732 }
2733
2734 return mainloop;
2735 }
2736
2737 #define MAX_N_CHARS 3
2738
2739 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)
2740 {
2741 DEFINE_COMPILER;
2742 struct sljit_label *start;
2743 struct sljit_jump *quit;
2744 pcre_uint32 chars[MAX_N_CHARS * 2];
2745 pcre_uchar *cc = common->start + 1 + LINK_SIZE;
2746 int location = 0;
2747 pcre_int32 len, c, bit, caseless;
2748 int must_stop;
2749
2750 /* We do not support alternatives now. */
2751 if (*(common->start + GET(common->start, 1)) == OP_ALT)
2752 return FALSE;
2753
2754 while (TRUE)
2755 {
2756 caseless = 0;
2757 must_stop = 1;
2758 switch(*cc)
2759 {
2760 case OP_CHAR:
2761 must_stop = 0;
2762 cc++;
2763 break;
2764
2765 case OP_CHARI:
2766 caseless = 1;
2767 must_stop = 0;
2768 cc++;
2769 break;
2770
2771 case OP_SOD:
2772 case OP_SOM:
2773 case OP_SET_SOM:
2774 case OP_NOT_WORD_BOUNDARY:
2775 case OP_WORD_BOUNDARY:
2776 case OP_EODN:
2777 case OP_EOD:
2778 case OP_CIRC:
2779 case OP_CIRCM:
2780 case OP_DOLL:
2781 case OP_DOLLM:
2782 /* Zero width assertions. */
2783 cc++;
2784 continue;
2785
2786 case OP_PLUS:
2787 case OP_MINPLUS:
2788 case OP_POSPLUS:
2789 cc++;
2790 break;
2791
2792 case OP_EXACT:
2793 cc += 1 + IMM2_SIZE;
2794 break;
2795
2796 case OP_PLUSI:
2797 case OP_MINPLUSI:
2798 case OP_POSPLUSI:
2799 caseless = 1;
2800 cc++;
2801 break;
2802
2803 case OP_EXACTI:
2804 caseless = 1;
2805 cc += 1 + IMM2_SIZE;
2806 break;
2807
2808 default:
2809 must_stop = 2;
2810 break;
2811 }
2812
2813 if (must_stop == 2)
2814 break;
2815
2816 len = 1;
2817 #ifdef SUPPORT_UTF
2818 if (common->utf && HAS_EXTRALEN(cc[0])) len += GET_EXTRALEN(cc[0]);
2819 #endif
2820
2821 if (caseless && char_has_othercase(common, cc))
2822 {
2823 caseless = char_get_othercase_bit(common, cc);
2824 if (caseless == 0)
2825 return FALSE;
2826 #ifdef COMPILE_PCRE8
2827 caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 8));
2828 #else
2829 if ((caseless & 0x100) != 0)
2830 caseless = ((caseless & 0xff) << 16) | (len - (caseless >> 9));
2831 else
2832 caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 9));
2833 #endif
2834 }
2835 else
2836 caseless = 0;
2837
2838 while (len > 0 && location < MAX_N_CHARS * 2)
2839 {
2840 c = *cc;
2841 bit = 0;
2842 if (len == (caseless & 0xff))
2843 {
2844 bit = caseless >> 8;
2845 c |= bit;
2846 }
2847
2848 chars[location] = c;
2849 chars[location + 1] = bit;
2850
2851 len--;
2852 location += 2;
2853 cc++;
2854 }
2855
2856 if (location >= MAX_N_CHARS * 2 || must_stop != 0)
2857 break;
2858 }
2859
2860 /* At least two characters are required. */
2861 if (location < 2 * 2)
2862 return FALSE;
2863
2864 if (firstline)
2865 {
2866 SLJIT_ASSERT(common->first_line_end != 0);
2867 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
2868 OP2(SLJIT_SUB, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
2869 }
2870 else
2871 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
2872
2873 start = LABEL();
2874 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2875
2876 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2877 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2878 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2879 if (chars[1] != 0)
2880 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]);
2881 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start);
2882 if (location > 2 * 2)
2883 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2884 if (chars[3] != 0)
2885 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[3]);
2886 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[2], start);
2887 if (location > 2 * 2)
2888 {
2889 if (chars[5] != 0)
2890 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[5]);
2891 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[4], start);
2892 }
2893 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2894
2895 JUMPHERE(quit);
2896
2897 if (firstline)
2898 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
2899 else
2900 OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
2901 return TRUE;
2902 }
2903
2904 #undef MAX_N_CHARS
2905
2906 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)
2907 {
2908 DEFINE_COMPILER;
2909 struct sljit_label *start;
2910 struct sljit_jump *quit;
2911 struct sljit_jump *found;
2912 pcre_uchar oc, bit;
2913
2914 if (firstline)
2915 {
2916 SLJIT_ASSERT(common->first_line_end != 0);
2917 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
2918 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
2919 }
2920
2921 start = LABEL();
2922 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2923 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2924
2925 oc = first_char;
2926 if (caseless)
2927 {
2928 oc = TABLE_GET(first_char, common->fcc, first_char);
2929 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
2930 if (first_char > 127 && common->utf)
2931 oc = UCD_OTHERCASE(first_char);
2932 #endif
2933 }
2934 if (first_char == oc)
2935 found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, first_char);
2936 else
2937 {
2938 bit = first_char ^ oc;
2939 if (is_powerof2(bit))
2940 {
2941 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
2942 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit);
2943 }
2944 else
2945 {
2946 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char);
2947 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2948 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
2949 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
2950 found = JUMP(SLJIT_C_NOT_ZERO);
2951 }
2952 }
2953
2954 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2955 JUMPTO(SLJIT_JUMP, start);
2956 JUMPHERE(found);
2957 JUMPHERE(quit);
2958
2959 if (firstline)
2960 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
2961 }
2962
2963 static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
2964 {
2965 DEFINE_COMPILER;
2966 struct sljit_label *loop;
2967 struct sljit_jump *lastchar;
2968 struct sljit_jump *firstchar;
2969 struct sljit_jump *quit;
2970 struct sljit_jump *foundcr = NULL;
2971 struct sljit_jump *notfoundnl;
2972 jump_list *newline = NULL;
2973
2974 if (firstline)
2975 {
2976 SLJIT_ASSERT(common->first_line_end != 0);
2977 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
2978 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
2979 }
2980
2981 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2982 {
2983 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2984 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
2985 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
2986 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
2987 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
2988
2989 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
2990 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
2991 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER_EQUAL);
2992 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2993 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
2994 #endif
2995 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2996
2997 loop = LABEL();
2998 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2999 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3000 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
3001 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3002 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
3003 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
3004
3005 JUMPHERE(quit);
3006 JUMPHERE(firstchar);
3007 JUMPHERE(lastchar);
3008
3009 if (firstline)
3010 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
3011 return;
3012 }
3013
3014 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3015 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3016 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3017 skip_char_back(common);
3018
3019 loop = LABEL();
3020 read_char(common);
3021 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3022 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3023 foundcr = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3024 check_newlinechar(common, common->nltype, &newline, FALSE);
3025 set_jumps(newline, loop);
3026
3027 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3028 {
3029 quit = JUMP(SLJIT_JUMP);
3030 JUMPHERE(foundcr);
3031 notfoundnl = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3032 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3033 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
3034 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3035 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3036 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3037 #endif
3038 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3039 JUMPHERE(notfoundnl);
3040 JUMPHERE(quit);
3041 }
3042 JUMPHERE(lastchar);
3043 JUMPHERE(firstchar);
3044
3045 if (firstline)
3046 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3047 }
3048
3049 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks);
3050
3051 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, sljit_uw start_bits, BOOL firstline)
3052 {
3053 DEFINE_COMPILER;
3054 struct sljit_label *start;
3055 struct sljit_jump *quit;
3056 struct sljit_jump *found = NULL;
3057 jump_list *matches = NULL;
3058 pcre_uint8 inverted_start_bits[32];
3059 int i;
3060 #ifndef COMPILE_PCRE8
3061 struct sljit_jump *jump;
3062 #endif
3063
3064 for (i = 0; i < 32; ++i)
3065 inverted_start_bits[i] = ~(((pcre_uint8*)start_bits)[i]);
3066
3067 if (firstline)
3068 {
3069 SLJIT_ASSERT(common->first_line_end != 0);
3070 OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
3071 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3072 }
3073
3074 start = LABEL();
3075 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3076 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3077 #ifdef SUPPORT_UTF
3078 if (common->utf)
3079 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3080 #endif
3081
3082 if (!check_class_ranges(common, inverted_start_bits, (inverted_start_bits[31] & 0x80) != 0, &matches))
3083 {
3084 #ifndef COMPILE_PCRE8
3085 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255);
3086 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
3087 JUMPHERE(jump);
3088 #endif
3089 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3090 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3091 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), start_bits);
3092 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3093 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3094 found = JUMP(SLJIT_C_NOT_ZERO);
3095 }
3096
3097 #ifdef SUPPORT_UTF
3098 if (common->utf)
3099 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3100 #endif
3101 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3102 #ifdef SUPPORT_UTF
3103 #if defined COMPILE_PCRE8
3104 if (common->utf)
3105 {
3106 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
3107 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3108 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3109 }
3110 #elif defined COMPILE_PCRE16
3111 if (common->utf)
3112 {
3113 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
3114 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3115 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3116 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3117 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3118 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3119 }
3120 #endif /* COMPILE_PCRE[8|16] */
3121 #endif /* SUPPORT_UTF */
3122 JUMPTO(SLJIT_JUMP, start);
3123 if (found != NULL)
3124 JUMPHERE(found);
3125 if (matches != NULL)
3126 set_jumps(matches, LABEL());
3127 JUMPHERE(quit);
3128
3129 if (firstline)
3130 OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
3131 }
3132
3133 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
3134 {
3135 DEFINE_COMPILER;
3136 struct sljit_label *loop;
3137 struct sljit_jump *toolong;
3138 struct sljit_jump *alreadyfound;
3139 struct sljit_jump *found;
3140 struct sljit_jump *foundoc = NULL;
3141 struct sljit_jump *notfound;
3142 pcre_uint32 oc, bit;
3143
3144 SLJIT_ASSERT(common->req_char_ptr != 0);
3145 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr);
3146 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
3147 toolong = CMP(SLJIT_C_LESS, TMP1, 0, STR_END, 0);
3148 alreadyfound = CMP(SLJIT_C_LESS, STR_PTR, 0, TMP2, 0);
3149
3150 if (has_firstchar)
3151 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3152 else
3153 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
3154
3155 loop = LABEL();
3156 notfound = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, STR_END, 0);
3157
3158 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
3159 oc = req_char;
3160 if (caseless)
3161 {
3162 oc = TABLE_GET(req_char, common->fcc, req_char);
3163 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3164 if (req_char > 127 && common->utf)
3165 oc = UCD_OTHERCASE(req_char);
3166 #endif
3167 }
3168 if (req_char == oc)
3169 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
3170 else
3171 {
3172 bit = req_char ^ oc;
3173 if (is_powerof2(bit))
3174 {
3175 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
3176 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
3177 }
3178 else
3179 {
3180 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
3181 foundoc = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, oc);
3182 }
3183 }
3184 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3185 JUMPTO(SLJIT_JUMP, loop);
3186
3187 JUMPHERE(found);
3188 if (foundoc)
3189 JUMPHERE(foundoc);
3190 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr, TMP1, 0);
3191 JUMPHERE(alreadyfound);
3192 JUMPHERE(toolong);
3193 return notfound;
3194 }
3195
3196 static void do_revertframes(compiler_common *common)
3197 {
3198 DEFINE_COMPILER;
3199 struct sljit_jump *jump;
3200 struct sljit_label *mainloop;
3201
3202 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3203 OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
3204 GET_LOCAL_BASE(TMP3, 0, 0);
3205
3206 /* Drop frames until we reach STACK_TOP. */
3207 mainloop = LABEL();
3208 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
3209 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0);
3210 jump = JUMP(SLJIT_C_SIG_LESS_EQUAL);
3211
3212 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
3213 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
3214 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_sw));
3215 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
3216 JUMPTO(SLJIT_JUMP, mainloop);
3217
3218 JUMPHERE(jump);
3219 jump = JUMP(SLJIT_C_SIG_LESS);
3220 /* End of dropping frames. */
3221 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3222
3223 JUMPHERE(jump);
3224 OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
3225 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
3226 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
3227 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
3228 JUMPTO(SLJIT_JUMP, mainloop);
3229 }
3230
3231 static void check_wordboundary(compiler_common *common)
3232 {
3233 DEFINE_COMPILER;
3234 struct sljit_jump *skipread;
3235 jump_list *skipread_list = NULL;
3236 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
3237 struct sljit_jump *jump;
3238 #endif
3239
3240 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
3241
3242 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3243 /* Get type of the previous char, and put it to LOCALS1. */
3244 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3245 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3246 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, 0);
3247 skipread = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
3248 skip_char_back(common);
3249 check_start_used_ptr(common);
3250 read_char(common);
3251
3252 /* Testing char type. */
3253 #ifdef SUPPORT_UCP
3254 if (common->use_ucp)
3255 {
3256 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3257 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3258 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3259 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3260 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3261 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3262 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3263 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3264 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3265 JUMPHERE(jump);
3266 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0);
3267 }
3268 else
3269 #endif
3270 {
3271 #ifndef COMPILE_PCRE8
3272 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3273 #elif defined SUPPORT_UTF
3274 /* Here LOCALS1 has already been zeroed. */
3275 jump = NULL;
3276 if (common->utf)
3277 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3278 #endif /* COMPILE_PCRE8 */
3279 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
3280 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
3281 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3282 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
3283 #ifndef COMPILE_PCRE8
3284 JUMPHERE(jump);
3285 #elif defined SUPPORT_UTF
3286 if (jump != NULL)
3287 JUMPHERE(jump);
3288 #endif /* COMPILE_PCRE8 */
3289 }
3290 JUMPHERE(skipread);
3291
3292 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3293 check_str_end(common, &skipread_list);
3294 peek_char(common);
3295
3296 /* Testing char type. This is a code duplication. */
3297 #ifdef SUPPORT_UCP
3298 if (common->use_ucp)
3299 {
3300 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3301 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3302 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3303 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3304 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3305 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3306 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3307 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3308 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3309 JUMPHERE(jump);
3310 }
3311 else
3312 #endif
3313 {
3314 #ifndef COMPILE_PCRE8
3315 /* TMP2 may be destroyed by peek_char. */
3316 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3317 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3318 #elif defined SUPPORT_UTF
3319 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3320 jump = NULL;
3321 if (common->utf)
3322 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3323 #endif
3324 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
3325 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
3326 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
3327 #ifndef COMPILE_PCRE8
3328 JUMPHERE(jump);
3329 #elif defined SUPPORT_UTF
3330 if (jump != NULL)
3331 JUMPHERE(jump);
3332 #endif /* COMPILE_PCRE8 */
3333 }
3334 set_jumps(skipread_list, LABEL());
3335
3336 OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3337 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3338 }
3339
3340 /*
3341 range format:
3342
3343 ranges[0] = length of the range (max MAX_RANGE_SIZE, -1 means invalid range).
3344 ranges[1] = first bit (0 or 1)
3345 ranges[2-length] = position of the bit change (when the current bit is not equal to the previous)
3346 */
3347
3348 static BOOL check_ranges(compiler_common *common, int *ranges, jump_list **backtracks, BOOL readch)
3349 {
3350 DEFINE_COMPILER;
3351 struct sljit_jump *jump;
3352
3353 if (ranges[0] < 0)
3354 return FALSE;
3355
3356 switch(ranges[0])
3357 {
3358 case 1:
3359 if (readch)
3360 read_char(common);
3361 add_jump(compiler, backtracks, CMP(ranges[1] == 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3362 return TRUE;
3363
3364 case 2:
3365 if (readch)
3366 read_char(common);
3367 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
3368 add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
3369 return TRUE;
3370
3371 case 4:
3372 if (ranges[2] + 1 == ranges[3] && ranges[4] + 1 == ranges[5])
3373 {
3374 if (readch)
3375 read_char(common);
3376 if (ranges[1] != 0)
3377 {
3378 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3379 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));
3380 }
3381 else
3382 {
3383 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]);
3384 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));
3385 JUMPHERE(jump);
3386 }
3387 return TRUE;
3388 }
3389 if ((ranges[3] - ranges[2]) == (ranges[5] - ranges[4]) && is_powerof2(ranges[4] - ranges[2]))
3390 {
3391 if (readch)
3392 read_char(common);
3393 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4] - ranges[2]);
3394 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4]);
3395 add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[5] - ranges[4]));
3396 return TRUE;
3397 }
3398 return FALSE;
3399
3400 default:
3401 return FALSE;
3402 }
3403 }
3404
3405 static void get_ctype_ranges(compiler_common *common, int flag, int *ranges)
3406 {
3407 int i, bit, length;
3408 const pcre_uint8 *ctypes = (const pcre_uint8*)common->ctypes;
3409
3410 bit = ctypes[0] & flag;
3411 ranges[0] = -1;
3412 ranges[1] = bit != 0 ? 1 : 0;
3413 length = 0;
3414
3415 for (i = 1; i < 256; i++)
3416 if ((ctypes[i] & flag) != bit)
3417 {
3418 if (length >= MAX_RANGE_SIZE)
3419 return;
3420 ranges[2 + length] = i;
3421 length++;
3422 bit ^= flag;
3423 }
3424
3425 if (bit != 0)
3426 {
3427 if (length >= MAX_RANGE_SIZE)
3428 return;
3429 ranges[2 + length] = 256;
3430 length++;
3431 }
3432 ranges[0] = length;
3433 }
3434
3435 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks)
3436 {
3437 int ranges[2 + MAX_RANGE_SIZE];
3438 pcre_uint8 bit, cbit, all;
3439 int i, byte, length = 0;
3440
3441 bit = bits[0] & 0x1;
3442 ranges[1] = bit;
3443 /* Can be 0 or 255. */
3444 all = -bit;
3445
3446 for (i = 0; i < 256; )
3447 {
3448 byte = i >> 3;
3449 if ((i & 0x7) == 0 && bits[byte] == all)
3450 i += 8;
3451 else
3452 {
3453 cbit = (bits[byte] >> (i & 0x7)) & 0x1;
3454 if (cbit != bit)
3455 {
3456 if (length >= MAX_RANGE_SIZE)
3457 return FALSE;
3458 ranges[2 + length] = i;
3459 length++;
3460 bit = cbit;
3461 all = -cbit;
3462 }
3463 i++;
3464 }
3465 }
3466
3467 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
3468 {
3469 if (length >= MAX_RANGE_SIZE)
3470 return FALSE;
3471 ranges[2 + length] = 256;
3472 length++;
3473 }
3474 ranges[0] = length;
3475
3476 return check_ranges(common, ranges, backtracks, FALSE);
3477 }
3478
3479 static void check_anynewline(compiler_common *common)
3480 {
3481 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3482 DEFINE_COMPILER;
3483
3484 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3485
3486 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
3487 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
3488 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3489 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
3490 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3491 #ifdef COMPILE_PCRE8
3492 if (common->utf)
3493 {
3494 #endif
3495 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3496 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
3497 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
3498 #ifdef COMPILE_PCRE8
3499 }
3500 #endif
3501 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3502 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3503 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3504 }
3505
3506 static void check_hspace(compiler_common *common)
3507 {
3508 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3509 DEFINE_COMPILER;
3510
3511 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3512
3513 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
3514 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3515 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
3516 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3517 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
3518 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3519 #ifdef COMPILE_PCRE8
3520 if (common->utf)
3521 {
3522 #endif
3523 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3524 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
3525 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3526 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
3527 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3528 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
3529 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
3530 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3531 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
3532 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3533 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
3534 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3535 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
3536 #ifdef COMPILE_PCRE8
3537 }
3538 #endif
3539 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3540 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3541
3542 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3543 }
3544
3545 static void check_vspace(compiler_common *common)
3546 {
3547 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3548 DEFINE_COMPILER;
3549
3550 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3551
3552 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
3553 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
3554 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3555 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
3556 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3557 #ifdef COMPILE_PCRE8
3558 if (common->utf)
3559 {
3560 #endif
3561 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3562 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
3563 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
3564 #ifdef COMPILE_PCRE8
3565 }
3566 #endif
3567 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3568 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3569
3570 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3571 }
3572
3573 #define CHAR1 STR_END
3574 #define CHAR2 STACK_TOP
3575
3576 static void do_casefulcmp(compiler_common *common)
3577 {
3578 DEFINE_COMPILER;
3579 struct sljit_jump *jump;
3580 struct sljit_label *label;
3581
3582 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3583 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3584 OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
3585 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR2, 0);
3586 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3587 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3588
3589 label = LABEL();
3590 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
3591 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3592 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
3593 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
3594 JUMPTO(SLJIT_C_NOT_ZERO, label);
3595
3596 JUMPHERE(jump);
3597 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3598 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
3599 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3600 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3601 }
3602
3603 #define LCC_TABLE STACK_LIMIT
3604
3605 static void do_caselesscmp(compiler_common *common)
3606 {
3607 DEFINE_COMPILER;
3608 struct sljit_jump *jump;
3609 struct sljit_label *label;
3610
3611 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3612 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3613
3614 OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
3615 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR1, 0);
3616 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, CHAR2, 0);
3617 OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
3618 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3619 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3620
3621 label = LABEL();
3622 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
3623 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3624 #ifndef COMPILE_PCRE8
3625 jump = CMP(SLJIT_C_GREATER, CHAR1, 0, SLJIT_IMM, 255);
3626 #endif
3627 OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
3628 #ifndef COMPILE_PCRE8
3629 JUMPHERE(jump);
3630 jump = CMP(SLJIT_C_GREATER, CHAR2, 0, SLJIT_IMM, 255);
3631 #endif
3632 OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
3633 #ifndef COMPILE_PCRE8
3634 JUMPHERE(jump);
3635 #endif
3636 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
3637 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
3638 JUMPTO(SLJIT_C_NOT_ZERO, label);
3639
3640 JUMPHERE(jump);
3641 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3642 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
3643 OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3644 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3645 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3646 }
3647
3648 #undef LCC_TABLE
3649 #undef CHAR1
3650 #undef CHAR2
3651
3652 #if defined SUPPORT_UTF && defined SUPPORT_UCP
3653
3654 static const pcre_uchar * SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
3655 {
3656 /* This function would be ineffective to do in JIT level. */
3657 pcre_uint32 c1, c2;
3658 const pcre_uchar *src2 = args->uchar_ptr;
3659 const pcre_uchar *end2 = args->end;
3660 const ucd_record *ur;
3661 const pcre_uint32 *pp;
3662
3663 while (src1 < end1)
3664 {
3665 if (src2 >= end2)
3666 return (pcre_uchar*)1;
3667 GETCHARINC(c1, src1);
3668 GETCHARINC(c2, src2);
3669 ur = GET_UCD(c2);
3670 if (c1 != c2 && c1 != c2 + ur->other_case)
3671 {
3672 pp = PRIV(ucd_caseless_sets) + ur->caseset;
3673 for (;;)
3674 {
3675 if (c1 < *pp) return NULL;
3676 if (c1 == *pp++) break;
3677 }
3678 }
3679 }
3680 return src2;
3681 }
3682
3683 #endif /* SUPPORT_UTF && SUPPORT_UCP */
3684
3685 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
3686 compare_context* context, jump_list **backtracks)
3687 {
3688 DEFINE_COMPILER;
3689 unsigned int othercasebit = 0;
3690 pcre_uchar *othercasechar = NULL;
3691 #ifdef SUPPORT_UTF
3692 int utflength;
3693 #endif
3694
3695 if (caseless && char_has_othercase(common, cc))
3696 {
3697 othercasebit = char_get_othercase_bit(common, cc);
3698 SLJIT_ASSERT(othercasebit);
3699 /* Extracting bit difference info. */
3700 #if defined COMPILE_PCRE8
3701 othercasechar = cc + (othercasebit >> 8);
3702 othercasebit &= 0xff;
3703 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3704 /* Note that this code only handles characters in the BMP. If there
3705 ever are characters outside the BMP whose othercase differs in only one
3706 bit from itself (there currently are none), this code will need to be
3707 revised for COMPILE_PCRE32. */
3708 othercasechar = cc + (othercasebit >> 9);
3709 if ((othercasebit & 0x100) != 0)
3710 othercasebit = (othercasebit & 0xff) << 8;
3711 else
3712 othercasebit &= 0xff;
3713 #endif /* COMPILE_PCRE[8|16|32] */
3714 }
3715
3716 if (context->sourcereg == -1)
3717 {
3718 #if defined COMPILE_PCRE8
3719 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3720 if (context->length >= 4)
3721 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3722 else if (context->length >= 2)
3723 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3724 else
3725 #endif
3726 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3727 #elif defined COMPILE_PCRE16
3728 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3729 if (context->length >= 4)
3730 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3731 else
3732 #endif
3733 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3734 #elif defined COMPILE_PCRE32
3735 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3736 #endif /* COMPILE_PCRE[8|16|32] */
3737 context->sourcereg = TMP2;
3738 }
3739
3740 #ifdef SUPPORT_UTF
3741 utflength = 1;
3742 if (common->utf && HAS_EXTRALEN(*cc))
3743 utflength += GET_EXTRALEN(*cc);
3744
3745 do
3746 {
3747 #endif
3748
3749 context->length -= IN_UCHARS(1);
3750 #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
3751
3752 /* Unaligned read is supported. */
3753 if (othercasebit != 0 && othercasechar == cc)
3754 {
3755 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
3756 context->oc.asuchars[context->ucharptr] = othercasebit;
3757 }
3758 else
3759 {
3760 context->c.asuchars[context->ucharptr] = *cc;
3761 context->oc.asuchars[context->ucharptr] = 0;
3762 }
3763 context->ucharptr++;
3764
3765 #if defined COMPILE_PCRE8
3766 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
3767 #else
3768 if (context->ucharptr >= 2 || context->length == 0)
3769 #endif
3770 {
3771 if (context->length >= 4)
3772 OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3773 else if (context->length >= 2)
3774 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3775 #if defined COMPILE_PCRE8
3776 else if (context->length >= 1)
3777 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3778 #endif /* COMPILE_PCRE8 */
3779 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
3780
3781 switch(context->ucharptr)
3782 {
3783 case 4 / sizeof(pcre_uchar):
3784 if (context->oc.asint != 0)
3785 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
3786 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
3787 break;
3788
3789 case 2 / sizeof(pcre_uchar):
3790 if (context->oc.asushort != 0)
3791 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
3792 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
3793 break;
3794
3795 #ifdef COMPILE_PCRE8
3796 case 1:
3797 if (context->oc.asbyte != 0)
3798 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
3799 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
3800 break;
3801 #endif
3802
3803 default:
3804 SLJIT_ASSERT_STOP();
3805 break;
3806 }
3807 context->ucharptr = 0;
3808 }
3809
3810 #else
3811
3812 /* Unaligned read is unsupported or in 32 bit mode. */
3813 if (context->length >= 1)
3814 OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3815
3816 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
3817
3818 if (othercasebit != 0 && othercasechar == cc)
3819 {
3820 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
3821 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
3822 }
3823 else
3824 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
3825
3826 #endif
3827
3828 cc++;
3829 #ifdef SUPPORT_UTF
3830 utflength--;
3831 }
3832 while (utflength > 0);
3833 #endif
3834
3835 return cc;
3836 }
3837
3838 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3839
3840 #define SET_TYPE_OFFSET(value) \
3841 if ((value) != typeoffset) \
3842 { \
3843 if ((value) > typeoffset) \
3844 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
3845 else \
3846 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
3847 } \
3848 typeoffset = (value);
3849
3850 #define SET_CHAR_OFFSET(value) \
3851 if ((value) != charoffset) \
3852 { \
3853 if ((value) > charoffset) \
3854 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (value) - charoffset); \
3855 else \
3856 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, charoffset - (value)); \
3857 } \
3858 charoffset = (value);
3859
3860 static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
3861 {
3862 DEFINE_COMPILER;
3863 jump_list *found = NULL;
3864 jump_list **list = (*cc & XCL_NOT) == 0 ? &found : backtracks;
3865 pcre_int32 c, charoffset;
3866 const pcre_uint32 *other_cases;
3867 struct sljit_jump *jump = NULL;
3868 pcre_uchar *ccbegin;
3869 int compares, invertcmp, numberofcmps;
3870 #ifdef SUPPORT_UCP
3871 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
3872 BOOL charsaved = FALSE;
3873 int typereg = TMP1, scriptreg = TMP1;
3874 pcre_int32 typeoffset;
3875 #endif
3876
3877 /* Although SUPPORT_UTF must be defined, we are
3878 not necessary in utf mode even in 8 bit mode. */
3879 detect_partial_match(common, backtracks);
3880 read_char(common);
3881
3882 if ((*cc++ & XCL_MAP) != 0)
3883 {
3884 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3885 #ifndef COMPILE_PCRE8
3886 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3887 #elif defined SUPPORT_UTF
3888 if (common->utf)
3889 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3890 #endif
3891
3892 if (!check_class_ranges(common, (const pcre_uint8 *)cc, TRUE, list))
3893 {
3894 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3895 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3896 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
3897 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3898 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3899 add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));
3900 }
3901
3902 #ifndef COMPILE_PCRE8
3903 JUMPHERE(jump);
3904 #elif defined SUPPORT_UTF
3905 if (common->utf)
3906 JUMPHERE(jump);
3907 #endif
3908 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3909 #ifdef SUPPORT_UCP
3910 charsaved = TRUE;
3911 #endif
3912 cc += 32 / sizeof(pcre_uchar);
3913 }
3914
3915 /* Scanning the necessary info. */
3916 ccbegin = cc;
3917 compares = 0;
3918 while (*cc != XCL_END)
3919 {
3920 compares++;
3921 if (*cc == XCL_SINGLE)
3922 {
3923 cc += 2;
3924 #ifdef SUPPORT_UTF
3925 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
3926 #endif
3927 #ifdef SUPPORT_UCP
3928 needschar = TRUE;
3929 #endif
3930 }
3931 else if (*cc == XCL_RANGE)
3932 {
3933 cc += 2;
3934 #ifdef SUPPORT_UTF
3935 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
3936 #endif
3937 cc++;
3938 #ifdef SUPPORT_UTF
3939 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
3940 #endif
3941 #ifdef SUPPORT_UCP
3942 needschar = TRUE;
3943 #endif
3944 }
3945 #ifdef SUPPORT_UCP
3946 else
3947 {
3948 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
3949 cc++;
3950 switch(*cc)
3951 {
3952 case PT_ANY:
3953 break;
3954
3955 case PT_LAMP:
3956 case PT_GC:
3957 case PT_PC:
3958 case PT_ALNUM:
3959 needstype = TRUE;
3960 break;
3961
3962 case PT_SC:
3963 needsscript = TRUE;
3964 break;
3965
3966 case PT_SPACE:
3967 case PT_PXSPACE:
3968 case PT_WORD:
3969 needstype = TRUE;
3970 needschar = TRUE;
3971 break;
3972
3973 case PT_CLIST:
3974 case PT_UCNC:
3975 needschar = TRUE;
3976 break;
3977
3978 default:
3979 SLJIT_ASSERT_STOP();
3980 break;
3981 }
3982 cc += 2;
3983 }
3984 #endif
3985 }
3986
3987 #ifdef SUPPORT_UCP
3988 /* Simple register allocation. TMP1 is preferred if possible. */
3989 if (needstype || needsscript)
3990 {
3991 if (needschar && !charsaved)
3992 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3993 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3994 if (needschar)
3995 {
3996 if (needstype)
3997 {
3998 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
3999 typereg = RETURN_ADDR;
4000 }
4001
4002 if (needsscript)
4003 scriptreg = TMP3;
4004 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4005 }
4006 else if (needstype && needsscript)
4007 scriptreg = TMP3;
4008 /* In all other cases only one of them was specified, and that can goes to TMP1. */
4009
4010 if (needsscript)
4011 {
4012 if (scriptreg == TMP1)
4013 {
4014 OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4015 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3);
4016 }
4017 else
4018 {
4019 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
4020 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4021 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0);
4022 }
4023 }
4024 }
4025 #endif
4026
4027 /* Generating code. */
4028 cc = ccbegin;
4029 charoffset = 0;
4030 numberofcmps = 0;
4031 #ifdef SUPPORT_UCP
4032 typeoffset = 0;
4033 #endif
4034
4035 while (*cc != XCL_END)
4036 {
4037 compares--;
4038 invertcmp = (compares == 0 && list != backtracks);
4039 jump = NULL;
4040
4041 if (*cc == XCL_SINGLE)
4042 {
4043 cc ++;
4044 #ifdef SUPPORT_UTF
4045 if (common->utf)
4046 {
4047 GETCHARINC(c, cc);
4048 }
4049 else
4050 #endif
4051 c = *cc++;
4052
4053 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
4054 {
4055 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4056 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_EQUAL);
4057 numberofcmps++;
4058 }
4059 else if (numberofcmps > 0)
4060 {
4061 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4062 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4063 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4064 numberofcmps = 0;
4065 }
4066 else
4067 {
4068 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
4069 numberofcmps = 0;
4070 }
4071 }
4072 else if (*cc == XCL_RANGE)
4073 {
4074 cc ++;
4075 #ifdef SUPPORT_UTF
4076 if (common->utf)
4077 {
4078 GETCHARINC(c, cc);
4079 }
4080 else
4081 #endif
4082 c = *cc++;
4083 SET_CHAR_OFFSET(c);
4084 #ifdef SUPPORT_UTF
4085 if (common->utf)
4086 {
4087 GETCHARINC(c, cc);
4088 }
4089 else
4090 #endif
4091 c = *cc++;
4092 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
4093 {
4094 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4095 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);
4096 numberofcmps++;
4097 }
4098 else if (numberofcmps > 0)
4099 {
4100 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4101 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4102 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4103 numberofcmps = 0;
4104 }
4105 else
4106 {
4107 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
4108 numberofcmps = 0;
4109 }
4110 }
4111 #ifdef SUPPORT_UCP
4112 else
4113 {
4114 if (*cc == XCL_NOTPROP)
4115 invertcmp ^= 0x1;
4116 cc++;
4117 switch(*cc)
4118 {
4119 case PT_ANY:
4120 if (list != backtracks)
4121 {
4122 if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0))
4123 continue;
4124 }
4125 else if (cc[-1] == XCL_NOTPROP)
4126 continue;
4127 jump = JUMP(SLJIT_JUMP);
4128 break;
4129
4130 case PT_LAMP:
4131 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
4132 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4133 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
4134 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4135 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
4136 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4137 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4138 break;
4139
4140 case PT_GC:
4141 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
4142 SET_TYPE_OFFSET(c);
4143 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
4144 break;
4145
4146 case PT_PC:
4147 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
4148 break;
4149
4150 case PT_SC:
4151 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, scriptreg, 0, SLJIT_IMM, (int)cc[1]);
4152 break;
4153
4154 case PT_SPACE:
4155 case PT_PXSPACE:
4156 if (*cc == PT_SPACE)
4157 {
4158 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4159 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 11 - charoffset);
4160 }
4161 SET_CHAR_OFFSET(9);
4162 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 13 - 9);
4163 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4164 if (*cc == PT_SPACE)
4165 JUMPHERE(jump);
4166
4167 SET_TYPE_OFFSET(ucp_Zl);
4168 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
4169 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4170 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4171 break;
4172
4173 case PT_WORD:
4174 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE - charoffset);
4175 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4176 /* Fall through. */
4177
4178 case PT_ALNUM:
4179 SET_TYPE_OFFSET(ucp_Ll);
4180 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4181 OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, (*cc == PT_ALNUM) ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);
4182 SET_TYPE_OFFSET(ucp_Nd);
4183 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4184 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4185 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4186 break;
4187
4188 case PT_CLIST:
4189 other_cases = PRIV(ucd_caseless_sets) + cc[1];
4190
4191 /* At least three characters are required.
4192 Otherwise this case would be handled by the normal code path. */
4193 SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
4194 SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
4195
4196 /* Optimizing character pairs, if their difference is power of 2. */
4197 if (is_powerof2(other_cases[1] ^ other_cases[0]))
4198 {
4199 if (charoffset == 0)
4200 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4201 else
4202 {
4203 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
4204 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4205 }
4206 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
4207 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4208 other_cases += 2;
4209 }
4210 else if (is_powerof2(other_cases[2] ^ other_cases[1]))
4211 {
4212 if (charoffset == 0)
4213 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
4214 else
4215 {
4216 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
4217 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4218 }
4219 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
4220 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4221
4222 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, other_cases[0] - charoffset);
4223 OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4224
4225 other_cases += 3;
4226 }
4227 else
4228 {
4229 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, *other_cases++ - charoffset);
4230 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4231 }
4232
4233 while (*other_cases != NOTACHAR)
4234 {
4235 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, *other_cases++ - charoffset);
4236 OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4237 }
4238 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4239 break;
4240
4241 case PT_UCNC:
4242 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_DOLLAR_SIGN - charoffset);
4243 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4244 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_COMMERCIAL_AT - charoffset);
4245 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4246 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_GRAVE_ACCENT - charoffset);
4247 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4248
4249 SET_CHAR_OFFSET(0xa0);
4250 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd7ff - charoffset);
4251 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4252 SET_CHAR_OFFSET(0);
4253 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
4254 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_GREATER_EQUAL);
4255 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4256 break;
4257 }
4258 cc += 2;
4259 }
4260 #endif
4261
4262 if (jump != NULL)
4263 add_jump(compiler, compares > 0 ? list : backtracks, jump);
4264 }
4265
4266 if (found != NULL)
4267 set_jumps(found, LABEL());
4268 }
4269
4270 #undef SET_TYPE_OFFSET
4271 #undef SET_CHAR_OFFSET
4272
4273 #endif
4274
4275 static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks)
4276 {
4277 DEFINE_COMPILER;
4278 int length;
4279 unsigned int c, oc, bit;
4280 compare_context context;
4281 struct sljit_jump *jump[4];
4282 jump_list *end_list;
4283 #ifdef SUPPORT_UTF
4284 struct sljit_label *label;
4285 #ifdef SUPPORT_UCP
4286 pcre_uchar propdata[5];
4287 #endif
4288 #endif
4289
4290 switch(type)
4291 {
4292 case OP_SOD:
4293 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4294 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4295 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
4296 return cc;
4297
4298 case OP_SOM:
4299 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4300 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
4301 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
4302 return cc;
4303
4304 case OP_NOT_WORD_BOUNDARY:
4305 case OP_WORD_BOUNDARY:
4306 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
4307 add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4308 return cc;
4309
4310 case OP_NOT_DIGIT:
4311 case OP_DIGIT:
4312 /* Digits are usually 0-9, so it is worth to optimize them. */
4313 if (common->digits[0] == -2)
4314 get_ctype_ranges(common, ctype_digit, common->digits);
4315 detect_partial_match(common, backtracks);
4316 /* Flip the starting bit in the negative case. */
4317 if (type == OP_NOT_DIGIT)
4318 common->digits[1] ^= 1;
4319 if (!check_ranges(common, common->digits, backtracks, TRUE))
4320 {
4321 read_char8_type(common);
4322 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
4323 add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4324 }
4325 if (type == OP_NOT_DIGIT)
4326 common->digits[1] ^= 1;
4327 return cc;
4328
4329 case OP_NOT_WHITESPACE:
4330 case OP_WHITESPACE:
4331 detect_partial_match(common, backtracks);
4332 read_char8_type(common);
4333 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
4334 add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4335 return cc;
4336
4337 case OP_NOT_WORDCHAR:
4338 case OP_WORDCHAR:
4339 detect_partial_match(common, backtracks);
4340 read_char8_type(common);
4341 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
4342 add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4343 return cc;
4344
4345 case OP_ANY:
4346 detect_partial_match(common, backtracks);
4347 read_char(common);
4348 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4349 {
4350 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
4351 end_list = NULL;
4352 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
4353 add_jump(compiler, &end_list, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4354 else
4355 check_str_end(common, &end_list);
4356
4357 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4358 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
4359 set_jumps(end_list, LABEL());
4360 JUMPHERE(jump[0]);
4361 }
4362 else
4363 check_newlinechar(common, common->nltype, backtracks, TRUE);
4364 return cc;
4365
4366 case OP_ALLANY:
4367 detect_partial_match(common, backtracks);
4368 #ifdef SUPPORT_UTF
4369 if (common->utf)
4370 {
4371 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4372 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4373 #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
4374 #if defined COMPILE_PCRE8
4375 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4376 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4377 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4378 #elif defined COMPILE_PCRE16
4379 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
4380 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4381 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4382 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4383 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4384 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4385 #endif
4386 JUMPHERE(jump[0]);
4387 #endif /* COMPILE_PCRE[8|16] */
4388 return cc;
4389 }
4390 #endif
4391 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4392 return cc;
4393
4394 case OP_ANYBYTE:
4395 detect_partial_match(common, backtracks);
4396 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4397 return cc;
4398
4399 #ifdef SUPPORT_UTF
4400 #ifdef SUPPORT_UCP
4401 case OP_NOTPROP:
4402 case OP_PROP:
4403 propdata[0] = 0;
4404 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
4405 propdata[2] = cc[0];
4406 propdata[3] = cc[1];
4407 propdata[4] = XCL_END;
4408 compile_xclass_matchingpath(common, propdata, backtracks);
4409 return cc + 2;
4410 #endif
4411 #endif
4412
4413 case OP_ANYNL:
4414 detect_partial_match(common, backtracks);
4415 read_char(common);
4416 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4417 /* We don't need to handle soft partial matching case. */
4418 end_list = NULL;
4419 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
4420 add_jump(compiler, &end_list, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4421 else
4422 check_str_end(common, &end_list);
4423 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4424 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
4425 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4426 jump[2] = JUMP(SLJIT_JUMP);
4427 JUMPHERE(jump[0]);
4428 check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
4429 set_jumps(end_list, LABEL());
4430 JUMPHERE(jump[1]);
4431 JUMPHERE(jump[2]);
4432 return cc;
4433
4434 case OP_NOT_HSPACE:
4435 case OP_HSPACE:
4436 detect_partial_match(common, backtracks);
4437 read_char(common);
4438 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
4439 add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4440 return cc;
4441
4442 case OP_NOT_VSPACE:
4443 case OP_VSPACE:
4444 detect_partial_match(common, backtracks);
4445 read_char(common);
4446 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
4447 add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4448 return cc;
4449
4450 #ifdef SUPPORT_UCP
4451 case OP_EXTUNI:
4452 detect_partial_match(common, backtracks);
4453 read_char(common);
4454 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4455 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
4456 /* Optimize register allocation: use a real register. */
4457 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
4458 OP1(SLJIT_MOV_UB, STACK_TOP, 0, SLJIT_MEM2(TMP1, TMP2), 3);
4459
4460 label = LABEL();
4461 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4462 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
4463 read_char(common);
4464 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4465 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
4466 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM2(TMP1, TMP2), 3);
4467
4468 OP2(SLJIT_SHL, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2);
4469 OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_sw)PRIV(ucp_gbtable));
4470 OP1(SLJIT_MOV, STACK_TOP, 0, TMP2, 0);
4471 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4472 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4473 JUMPTO(SLJIT_C_NOT_ZERO, label);
4474
4475 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
4476 JUMPHERE(jump[0]);
4477 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
4478
4479 if (common->mode == JIT_PARTIAL_HARD_COMPILE)
4480 {
4481 jump[0] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
4482 /* Since we successfully read a char above, partial matching must occure. */
4483 check_partial(common, TRUE);
4484 JUMPHERE(jump[0]);
4485 }
4486 return cc;
4487 #endif
4488
4489 case OP_EODN:
4490 /* Requires rather complex checks. */
4491 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4492 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4493 {
4494 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4495 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4496 if (common->mode == JIT_COMPILE)
4497 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
4498 else
4499 {
4500 jump[1] = CMP(SLJIT_C_EQUAL, TMP2, 0, STR_END, 0);
4501 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
4502 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS);
4503 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
4504 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_NOT_EQUAL);
4505 add_jump(compiler, backtracks, JUMP(SLJIT_C_NOT_EQUAL));
4506 check_partial(common, TRUE);
4507 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4508 JUMPHERE(jump[1]);
4509 }
4510 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4511 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4512 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4513 }
4514 else if (common->nltype == NLTYPE_FIXED)
4515 {
4516 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4517 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4518 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
4519 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
4520 }
4521 else
4522 {
4523 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4524 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4525 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4526 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
4527 jump[2] = JUMP(SLJIT_C_GREATER);
4528 add_jump(compiler, backtracks, JUMP(SLJIT_C_LESS));
4529 /* Equal. */
4530 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4531 jump[3] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
4532 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4533
4534 JUMPHERE(jump[1]);
4535 if (common->nltype == NLTYPE_ANYCRLF)
4536 {
4537 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4538 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, STR_END, 0));
4539 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
4540 }
4541 else
4542 {
4543 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, STR_PTR, 0);
4544 read_char(common);
4545 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
4546 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
4547 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4548 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
4549 }
4550 JUMPHERE(jump[2]);
4551 JUMPHERE(jump[3]);
4552 }
4553 JUMPHERE(jump[0]);
4554 check_partial(common, FALSE);
4555 return cc;
4556
4557 case OP_EOD:
4558 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
4559 check_partial(common, FALSE);
4560 return cc;
4561
4562 case OP_CIRC:
4563 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4564 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
4565 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0));
4566 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
4567 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4568 return cc;
4569
4570 case OP_CIRCM:
4571 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4572 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
4573 jump[1] = CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0);
4574 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
4575 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4576 jump[0] = JUMP(SLJIT_JUMP);
4577 JUMPHERE(jump[1]);
4578
4579 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4580 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4581 {
4582 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4583 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, TMP1, 0));
4584 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4585 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4586 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4587 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4588 }
4589 else
4590 {
4591 skip_char_back(common);
4592 read_char(common);
4593 check_newlinechar(common, common->nltype, backtracks, FALSE);
4594 }
4595 JUMPHERE(jump[0]);
4596 return cc;
4597
4598 case OP_DOLL:
4599 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4600 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
4601 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4602
4603 if (!common->endonly)
4604 compile_char1_matchingpath(common, OP_EODN, cc, backtracks);
4605 else
4606 {
4607 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
4608 check_partial(common, FALSE);
4609 }
4610 return cc;
4611
4612 case OP_DOLLM:
4613 jump[1] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
4614 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4615 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
4616 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4617 check_partial(common, FALSE);
4618 jump[0] = JUMP(SLJIT_JUMP);
4619 JUMPHERE(jump[1]);
4620
4621 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4622 {
4623 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4624 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4625 if (common->mode == JIT_COMPILE)
4626 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, TMP2, 0, STR_END, 0));
4627 else
4628 {
4629 jump[1] = CMP(SLJIT_C_LESS_EQUAL, TMP2, 0, STR_END, 0);
4630 /* STR_PTR = STR_END - IN_UCHARS(1) */
4631 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4632 check_partial(common, TRUE);
4633 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4634 JUMPHERE(jump[1]);
4635 }
4636
4637 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4638 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4639 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4640 }
4641 else
4642 {
4643 peek_char(common);
4644 check_newlinechar(common, common->nltype, backtracks, FALSE);
4645 }
4646 JUMPHERE(jump[0]);
4647 return cc;
4648
4649 case OP_CHAR:
4650 case OP_CHARI:
4651 length = 1;
4652 #ifdef SUPPORT_UTF
4653 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
4654 #endif
4655 if (common->mode == JIT_COMPILE && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
4656 {
4657 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
4658 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
4659
4660 context.length = IN_UCHARS(length);
4661 context.sourcereg = -1;
4662 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4663 context.ucharptr = 0;
4664 #endif
4665 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
4666 }
4667 detect_partial_match(common, backtracks);
4668 read_char(common);
4669 #ifdef SUPPORT_UTF
4670 if (common->utf)
4671 {
4672 GETCHAR(c, cc);
4673 }
4674 else
4675 #endif
4676 c = *cc;
4677 if (type == OP_CHAR || !char_has_othercase(common, cc))
4678 {
4679 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
4680 return cc + length;
4681 }
4682 oc = char_othercase(common, c);
4683 bit = c ^ oc;
4684 if (is_powerof2(bit))
4685 {
4686 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
4687 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
4688 return cc + length;
4689 }
4690 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c);
4691 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4692 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
4693 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4694 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4695 return cc + length;
4696
4697 case OP_NOT:
4698 case OP_NOTI:
4699 detect_partial_match(common, backtracks);
4700 length = 1;
4701 #ifdef SUPPORT_UTF
4702 if (common->utf)
4703 {
4704 #ifdef COMPILE_PCRE8
4705 c = *cc;
4706 if (c < 128)
4707 {
4708 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4709 if (type == OP_NOT || !char_has_othercase(common, cc))
4710 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4711 else
4712 {
4713 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
4714 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
4715 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
4716 }
4717 /* Skip the variable-length character. */
4718 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4719 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4720 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4721 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4722 JUMPHERE(jump[0]);
4723 return cc + 1;
4724 }
4725 else
4726 #endif /* COMPILE_PCRE8 */
4727 {
4728 GETCHARLEN(c, cc, length);
4729 read_char(common);
4730 }
4731 }
4732 else
4733 #endif /* SUPPORT_UTF */
4734 {
4735 read_char(common);
4736 c = *cc;
4737 }
4738
4739 if (type == OP_NOT || !char_has_othercase(common, cc))
4740 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4741 else
4742 {
4743 oc = char_othercase(common, c);
4744 bit = c ^ oc;
4745 if (is_powerof2(bit))
4746 {
4747 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
4748 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
4749 }
4750 else
4751 {
4752 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4753 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, oc));
4754 }
4755 }
4756 return cc + length;
4757
4758 case OP_CLASS:
4759 case OP_NCLASS:
4760 detect_partial_match(common, backtracks);
4761 read_char(common);
4762 if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, backtracks))
4763 return cc + 32 / sizeof(pcre_uchar);
4764
4765 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4766 jump[0] = NULL;
4767 #ifdef COMPILE_PCRE8
4768 /* This check only affects 8 bit mode. In other modes, we
4769 always need to compare the value with 255. */
4770 if (common->utf)
4771 #endif /* COMPILE_PCRE8 */
4772 {
4773 jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4774 if (type == OP_CLASS)
4775 {
4776 add_jump(compiler, backtracks, jump[0]);
4777 jump[0] = NULL;
4778 }
4779 }
4780 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
4781 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4782 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4783 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4784 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4785 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4786 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4787 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4788 if (jump[0] != NULL)
4789 JUMPHERE(jump[0]);
4790 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
4791 return cc + 32 / sizeof(pcre_uchar);
4792
4793 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4794 case OP_XCLASS:
4795 compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
4796 return cc + GET(cc, 0) - 1;
4797 #endif
4798
4799 case OP_REVERSE:
4800 length = GET(cc, 0);
4801 if (length == 0)
4802 return cc + LINK_SIZE;
4803 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4804 #ifdef SUPPORT_UTF
4805 if (common->utf)
4806 {
4807 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4808 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
4809 label = LABEL();
4810 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
4811 skip_char_back(common);
4812 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
4813 JUMPTO(SLJIT_C_NOT_ZERO, label);
4814 }
4815 else
4816 #endif
4817 {
4818 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4819 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
4820 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0));
4821 }
4822 check_start_used_ptr(common);
4823 return cc + LINK_SIZE;
4824 }
4825 SLJIT_ASSERT_STOP();
4826 return cc;
4827 }
4828
4829 static SLJIT_INLINE pcre_uchar *compile_charn_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks)
4830 {
4831 /* This function consumes at least one input character. */
4832 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
4833 DEFINE_COMPILER;
4834 pcre_uchar *ccbegin = cc;
4835 compare_context context;
4836 int size;
4837
4838 context.length = 0;
4839 do
4840 {
4841 if (cc >= ccend)
4842 break;
4843
4844 if (*cc == OP_CHAR)
4845 {
4846 size = 1;
4847 #ifdef SUPPORT_UTF
4848 if (common->utf && HAS_EXTRALEN(cc[1]))
4849 size += GET_EXTRALEN(cc[1]);
4850 #endif
4851 }
4852 else if (*cc == OP_CHARI)
4853 {
4854 size = 1;
4855 #ifdef SUPPORT_UTF
4856 if (common->utf)
4857 {
4858 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
4859 size = 0;
4860 else if (HAS_EXTRALEN(cc[1]))
4861 size += GET_EXTRALEN(cc[1]);
4862 }
4863 else
4864 #endif
4865 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
4866 size = 0;
4867 }
4868 else
4869 size = 0;
4870
4871 cc += 1 + size;
4872 context.length += IN_UCHARS(size);
4873 }
4874 while (size > 0 && context.length <= 128);
4875
4876 cc = ccbegin;
4877 if (context.length > 0)
4878 {
4879 /* We have a fixed-length byte sequence. */
4880 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
4881 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
4882
4883 context.sourcereg = -1;
4884 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4885 context.ucharptr = 0;
4886 #endif
4887 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
4888 return cc;
4889 }
4890
4891 /* A non-fixed length character will be checked if length == 0. */
4892 return compile_char1_matchingpath(common, *cc, cc + 1, backtracks);
4893 }
4894
4895 static struct sljit_jump *compile_ref_checks(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
4896 {
4897 DEFINE_COMPILER;
4898 int offset = GET2(cc, 1) << 1;
4899
4900 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
4901 if (!common->jscript_compat)
4902 {
4903 if (backtracks == NULL)
4904 {
4905 /* OVECTOR(1) contains the "string begin - 1" constant. */
4906 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
4907 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4908 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
4909 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4910 return JUMP(SLJIT_C_NOT_ZERO);
4911 }
4912 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
4913 }
4914 return CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
4915 }
4916
4917 /* Forward definitions. */
4918 static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);
4919 static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
4920
4921 #define PUSH_BACKTRACK(size, ccstart, error) \
4922 do \
4923 { \
4924 backtrack = sljit_alloc_memory(compiler, (size)); \
4925 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
4926 return error; \
4927 memset(backtrack, 0, size); \
4928 backtrack->prev = parent->top; \
4929 backtrack->cc = (ccstart); \
4930 parent->top = backtrack; \
4931 } \
4932 while (0)
4933
4934 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
4935 do \
4936 { \
4937 backtrack = sljit_alloc_memory(compiler, (size)); \
4938 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
4939 return; \
4940 memset(backtrack, 0, size); \
4941 backtrack->prev = parent->top; \
4942 backtrack->cc = (ccstart); \
4943 parent->top = backtrack; \
4944 } \
4945 while (0)
4946
4947 #define BACKTRACK_AS(type) ((type *)backtrack)
4948
4949 static pcre_uchar *compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
4950 {
4951 DEFINE_COMPILER;
4952 int offset = GET2(cc, 1) << 1;
4953 struct sljit_jump *jump = NULL;
4954 struct sljit_jump *partial;
4955 struct sljit_jump *nopartial;
4956
4957 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
4958 /* OVECTOR(1) contains the "string begin - 1" constant. */
4959 if (withchecks && !common->jscript_compat)
4960 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
4961
4962 #if defined SUPPORT_UTF && defined SUPPORT_UCP
4963 if (common->utf && *cc == OP_REFI)
4964 {
4965 SLJIT_ASSERT(TMP1 == SLJIT_SCRATCH_REG1 && STACK_TOP == SLJIT_SCRATCH_REG2 && TMP2 == SLJIT_SCRATCH_REG3);
4966 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
4967 if (withchecks)
4968 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, TMP2, 0);
4969
4970 /* Needed to save important temporary registers. */
4971 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
4972 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0);
4973 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0);
4974 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
4975 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
4976 if (common->mode == JIT_COMPILE)
4977 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
4978 else
4979 {
4980 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
4981 nopartial = CMP(SLJIT_C_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
4982 check_partial(common, FALSE);
4983 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4984 JUMPHERE(nopartial);
4985 }
4986 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
4987 }
4988 else
4989 #endif /* SUPPORT_UTF && SUPPORT_UCP */
4990 {
4991 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0);
4992 if (withchecks)
4993 jump = JUMP(SLJIT_C_ZERO);
4994
4995 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4996 partial = CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0);
4997 if (common->mode == JIT_COMPILE)
4998 add_jump(compiler, backtracks, partial);
4999
5000 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
5001 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5002
5003 if (common->mode != JIT_COMPILE)
5004 {
5005 nopartial = JUMP(SLJIT_JUMP);
5006 JUMPHERE(partial);
5007 /* TMP2 -= STR_END - STR_PTR */
5008 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
5009 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
5010 partial = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0);
5011 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
5012 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
5013 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5014 JUMPHERE(partial);
5015 check_partial(common, FALSE);
5016 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5017 JUMPHERE(nopartial);
5018 }
5019 }
5020
5021 if (jump != NULL)
5022 {
5023 if (emptyfail)
5024 add_jump(compiler, backtracks, jump);
5025 else
5026 JUMPHERE(jump);
5027 }
5028 return cc + 1 + IMM2_SIZE;
5029 }
5030
5031 static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5032 {
5033 DEFINE_COMPILER;
5034 backtrack_common *backtrack;
5035 pcre_uchar type;
5036 struct sljit_label *label;
5037 struct sljit_jump *zerolength;
5038 struct sljit_jump *jump = NULL;
5039 pcre_uchar *ccbegin = cc;
5040 int min = 0, max = 0;
5041 BOOL minimize;
5042
5043 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
5044
5045 type = cc[1 + IMM2_SIZE];
5046 minimize = (type & 0x1) != 0;
5047 switch(type)
5048 {
5049 case OP_CRSTAR:
5050 case OP_CRMINSTAR:
5051 min = 0;
5052 max = 0;
5053 cc += 1 + IMM2_SIZE + 1;
5054 break;
5055 case OP_CRPLUS:
5056 case OP_CRMINPLUS:
5057 min = 1;
5058 max = 0;
5059 cc += 1 + IMM2_SIZE + 1;
5060 break;
5061 case OP_CRQUERY:
5062 case OP_CRMINQUERY:
5063 min = 0;
5064 max = 1;
5065 cc += 1 + IMM2_SIZE + 1;
5066 break;
5067 case OP_CRRANGE:
5068 case OP_CRMINRANGE:
5069 min = GET2(cc, 1 + IMM2_SIZE + 1);
5070 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
5071 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
5072 break;
5073 default:
5074 SLJIT_ASSERT_STOP();
5075 break;
5076 }
5077
5078 if (!minimize)
5079 {
5080 if (min == 0)
5081 {
5082 allocate_stack(common, 2);
5083 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5084 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
5085 /* Temporary release of STR_PTR. */
5086 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5087 zerolength = compile_ref_checks(common, ccbegin, NULL);
5088 /* Restore if not zero length. */
5089 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5090 }
5091 else
5092 {
5093 allocate_stack(common, 1);
5094 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5095 zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks);
5096 }
5097
5098 if (min > 1 || max > 1)
5099 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
5100
5101 label = LABEL();
5102 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
5103
5104 if (min > 1 || max > 1)
5105 {
5106 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
5107 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5108 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
5109 if (min > 1)
5110 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, label);
5111 if (max > 1)
5112 {
5113 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
5114 allocate_stack(common, 1);
5115 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5116 JUMPTO(SLJIT_JUMP, label);
5117 JUMPHERE(jump);
5118 }
5119 }
5120
5121 if (max == 0)
5122 {
5123 /* Includes min > 1 case as well. */
5124 allocate_stack(common, 1);
5125 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5126 JUMPTO(SLJIT_JUMP, label);
5127 }
5128
5129 JUMPHERE(zerolength);
5130 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
5131
5132 decrease_call_count(common);
5133 return cc;
5134 }
5135
5136 allocate_stack(common, 2);
5137 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5138 if (type != OP_CRMINSTAR)
5139 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
5140
5141 if (min == 0)
5142 {
5143 zerolength = compile_ref_checks(common, ccbegin, NULL);
5144 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5145 jump = JUMP(SLJIT_JUMP);
5146 }
5147 else
5148 zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks);
5149
5150 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
5151 if (max > 0)
5152 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
5153
5154 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
5155 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5156
5157 if (min > 1)
5158 {
5159 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5160 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5161 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5162 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(iterator_backtrack)->matchingpath);
5163 }
5164 else if (max > 0)
5165 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
5166
5167 if (jump != NULL)
5168 JUMPHERE(jump);
5169 JUMPHERE(zerolength);
5170
5171 decrease_call_count(common);
5172 return cc;
5173 }
5174
5175 static SLJIT_INLINE pcre_uchar *compile_recurse_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5176 {
5177 DEFINE_COMPILER;
5178 backtrack_common *backtrack;
5179 recurse_entry *entry = common->entries;
5180 recurse_entry *prev = NULL;
5181 int start = GET(cc, 1);
5182 pcre_uchar *start_cc;
5183
5184 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
5185
5186 /* Inlining simple patterns. */
5187 if (get_framesize(common, common->start + start, TRUE) == no_stack)
5188 {
5189 start_cc = common->start + start;
5190 compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
5191 BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
5192 return cc + 1 + LINK_SIZE;
5193 }
5194
5195 while (entry != NULL)
5196 {
5197 if (entry->start == start)
5198 break;
5199 prev = entry;
5200 entry = entry->next;
5201 }
5202
5203 if (entry == NULL)
5204 {
5205 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
5206 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5207 return NULL;
5208 entry->next = NULL;
5209 entry->entry = NULL;
5210 entry->calls = NULL;
5211 entry->start = start;
5212
5213 if (prev != NULL)
5214 prev->next = entry;
5215 else
5216 common->entries = entry;
5217 }
5218
5219 if (common->has_set_som && common->mark_ptr != 0)
5220 {
5221 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
5222 allocate_stack(common, 2);
5223 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
5224 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5225 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5226 }
5227 else if (common->has_set_som || common->mark_ptr != 0)
5228 {
5229 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr);
5230 allocate_stack(common, 1);
5231 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5232 }
5233
5234 if (entry->entry == NULL)
5235 add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
5236 else
5237 JUMPTO(SLJIT_FAST_CALL, entry->entry);
5238 /* Leave if the match is failed. */
5239 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0));
5240 return cc + 1 + LINK_SIZE;
5241 }
5242
5243 static int SLJIT_CALL do_callout(struct jit_arguments* arguments, PUBL(callout_block) *callout_block, pcre_uchar **jit_ovector)
5244 {
5245 const pcre_uchar *begin = arguments->begin;
5246 int *offset_vector = arguments->offsets;
5247 int offset_count = arguments->offset_count;
5248 int i;
5249
5250 if (PUBL(callout) == NULL)
5251 return 0;
5252
5253 callout_block->version = 2;
5254 callout_block->callout_data = arguments->callout_data;
5255
5256 /* Offsets in subject. */
5257 callout_block->subject_length = arguments->end - arguments->begin;
5258 callout_block->start_match = (pcre_uchar*)callout_block->subject - arguments->begin;
5259 callout_block->current_position = (pcre_uchar*)callout_block->offset_vector - arguments->begin;
5260 #if defined COMPILE_PCRE8
5261 callout_block->subject = (PCRE_SPTR)begin;
5262 #elif defined COMPILE_PCRE16
5263 callout_block->subject = (PCRE_SPTR16)begin;
5264 #elif defined COMPILE_PCRE32
5265 callout_block->subject = (PCRE_SPTR32)begin;
5266 #endif
5267
5268 /* Convert and copy the JIT offset vector to the offset_vector array. */
5269 callout_block->capture_top = 0;
5270 callout_block->offset_vector = offset_vector;
5271 for (i = 2; i < offset_count; i += 2)
5272 {
5273 offset_vector[i] = jit_ovector[i] - begin;
5274 offset_vector[i + 1] = jit_ovector[i + 1] - begin;
5275 if (jit_ovector[i] >= begin)
5276 callout_block->capture_top = i;
5277 }
5278
5279 callout_block->capture_top = (callout_block->capture_top >> 1) + 1;
5280 if (offset_count > 0)
5281 offset_vector[0] = -1;
5282 if (offset_count > 1)
5283 offset_vector[1] = -1;
5284 return (*PUBL(callout))(callout_block);
5285 }
5286
5287 /* Aligning to 8 byte. */
5288 #define CALLOUT_ARG_SIZE \
5289 (((int)sizeof(PUBL(callout_block)) + 7) & ~7)
5290
5291 #define CALLOUT_ARG_OFFSET(arg) \
5292 (-CALLOUT_ARG_SIZE + SLJIT_OFFSETOF(PUBL(callout_block), arg))
5293
5294 static SLJIT_INLINE pcre_uchar *compile_callout_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5295 {
5296 DEFINE_COMPILER;
5297 backtrack_common *backtrack;
5298
5299 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
5300
5301 allocate_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
5302
5303 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
5304 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5305 SLJIT_ASSERT(common->capture_last_ptr != 0);
5306 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, cc[1]);
5307 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
5308
5309 /* These pointer sized fields temporarly stores internal variables. */
5310 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
5311 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
5312 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(subject), TMP2, 0);
5313
5314 if (common->mark_ptr != 0)
5315 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
5316 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 2));
5317 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 2 + LINK_SIZE));
5318 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
5319
5320 /* Needed to save important temporary registers. */
5321 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
5322 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG2, 0, STACK_TOP, 0, SLJIT_IMM, CALLOUT_ARG_SIZE);
5323 GET_LOCAL_BASE(SLJIT_SCRATCH_REG3, 0, OVECTOR_START);
5324 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout));
5325 OP1(SLJIT_MOV_SI, SLJIT_RETURN_REG, 0, SLJIT_RETURN_REG, 0);
5326 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
5327 free_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
5328
5329 /* Check return value. */
5330 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
5331 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_C_SIG_GREATER));
5332 if (common->forced_quit_label == NULL)
5333 add_jump(compiler, &common->forced_quit, JUMP(SLJIT_C_SIG_LESS));
5334 else
5335 JUMPTO(SLJIT_C_SIG_LESS, common->forced_quit_label);
5336 return cc + 2 + 2 * LINK_SIZE;
5337 }
5338
5339 #undef CALLOUT_ARG_SIZE
5340 #undef CALLOUT_ARG_OFFSET
5341
5342 static pcre_uchar *compile_assert_matchingpath(compiler_common *common, pcre_uchar *cc, assert_backtrack *backtrack, BOOL conditional)
5343 {
5344 DEFINE_COMPILER;
5345 int framesize;
5346 int private_data_ptr;
5347 backtrack_common altbacktrack;
5348 pcre_uchar *ccbegin;
5349 pcre_uchar opcode;
5350 pcre_uchar bra = OP_BRA;
5351 jump_list *tmp = NULL;
5352 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
5353 jump_list **found;
5354 /* Saving previous accept variables. */
5355 struct sljit_label *save_quit_label = common->quit_label;
5356 struct sljit_label *save_accept_label = common->accept_label;
5357 jump_list *save_quit = common->quit;
5358 jump_list *save_accept = common->accept;
5359 struct sljit_jump *jump;
5360 struct sljit_jump *brajump = NULL;
5361
5362 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
5363 {
5364 SLJIT_ASSERT(!conditional);
5365 bra = *cc;
5366 cc++;
5367 }
5368 private_data_ptr = PRIVATE_DATA(cc);
5369 SLJIT_ASSERT(private_data_ptr != 0);
5370 framesize = get_framesize(common, cc, FALSE);
5371 backtrack->framesize = framesize;
5372 backtrack->private_data_ptr = private_data_ptr;
5373 opcode = *cc;
5374 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
5375 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
5376 ccbegin = cc;
5377 cc += GET(cc, 1);
5378
5379 if (bra == OP_BRAMINZERO)
5380 {
5381 /* This is a braminzero backtrack path. */
5382 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5383 free_stack(common, 1);
5384 brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5385 }
5386
5387 if (framesize < 0)
5388 {
5389 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
5390 allocate_stack(common, 1);
5391 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5392 }
5393 else
5394 {
5395 allocate_stack(common, framesize + 2);
5396 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5397 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(framesize + 1));
5398 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
5399 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5400 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5401 init_frame(common, ccbegin, framesize + 1, 2, FALSE);
5402 }
5403
5404 memset(&altbacktrack, 0, sizeof(backtrack_common));
5405 common->quit_label = NULL;
5406 common->quit = NULL;
5407 while (1)
5408 {
5409 common->accept_label = NULL;
5410 common->accept = NULL;
5411 altbacktrack.top = NULL;
5412 altbacktrack.topbacktracks = NULL;
5413
5414 if (*ccbegin == OP_ALT)
5415 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5416
5417 altbacktrack.cc = ccbegin;
5418 compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
5419 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5420 {
5421 common->quit_label = save_quit_label;
5422 common->accept_label = save_accept_label;
5423 common->quit = save_quit;
5424 common->accept = save_accept;
5425 return NULL;
5426 }
5427 common->accept_label = LABEL();
5428 if (common->accept != NULL)
5429 set_jumps(common->accept, common->accept_label);
5430
5431 /* Reset stack. */
5432 if (framesize < 0)
5433 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5434 else {
5435 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
5436 {
5437 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5438 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
5439 }
5440 else
5441 {
5442 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5443 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5444 }
5445 }
5446
5447 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
5448 {
5449 /* We know that STR_PTR was stored on the top of the stack. */
5450 if (conditional)
5451 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
5452 else if (bra == OP_BRAZERO)
5453 {
5454 if (framesize < 0)
5455 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
5456 else
5457 {
5458 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
5459 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_sw));
5460 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5461 }
5462 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5463 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5464 }
5465 else if (framesize >= 0)
5466 {
5467 /* For OP_BRA and OP_BRAMINZERO. */
5468 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
5469 }
5470 }
5471 add_jump(compiler, found, JUMP(SLJIT_JUMP));
5472
5473 compile_backtrackingpath(common, altbacktrack.top);
5474 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5475 {
5476 common->quit_label = save_quit_label;
5477 common->accept_label = save_accept_label;
5478 common->quit = save_quit;
5479 common->accept = save_accept;
5480 return NULL;
5481 }
5482 set_jumps(altbacktrack.topbacktracks, LABEL());
5483
5484 if (*cc != OP_ALT)
5485 break;
5486
5487 ccbegin = cc;
5488 cc += GET(cc, 1);
5489 }
5490 /* None of them matched. */
5491 if (common->quit != NULL)
5492 set_jumps(common->quit, LABEL());
5493
5494 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
5495 {
5496 /* Assert is failed. */
5497 if (conditional || bra == OP_BRAZERO)
5498 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5499
5500 if (framesize < 0)
5501 {
5502 /* The topmost item should be 0. */
5503 if (bra == OP_BRAZERO)
5504 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5505 else
5506 free_stack(common, 1);
5507 }
5508 else
5509 {
5510 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5511 /* The topmost item should be 0. */
5512 if (bra == OP_BRAZERO)
5513 {
5514 free_stack(common, framesize + 1);
5515 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5516 }
5517 else
5518 free_stack(common, framesize + 2);
5519 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5520 }
5521 jump = JUMP(SLJIT_JUMP);
5522 if (bra != OP_BRAZERO)
5523 add_jump(compiler, target, jump);
5524
5525 /* Assert is successful. */
5526 set_jumps(tmp, LABEL());
5527 if (framesize < 0)
5528 {
5529 /* We know that STR_PTR was stored on the top of the stack. */
5530 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
5531 /* Keep the STR_PTR on the top of the stack. */
5532 if (bra == OP_BRAZERO)
5533 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5534 else if (bra == OP_BRAMINZERO)
5535 {
5536 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5537 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5538 }
5539 }
5540 else
5541 {
5542 if (bra == OP_BRA)
5543 {
5544 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5545 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
5546 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
5547 }
5548 else
5549 {
5550 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5551 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));
5552 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5553 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
5554 }
5555 }
5556
5557 if (bra == OP_BRAZERO)
5558 {
5559 backtrack->matchingpath = LABEL();
5560 SET_LABEL(jump, backtrack->matchingpath);
5561 }
5562 else if (bra == OP_BRAMINZERO)
5563 {
5564 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
5565 JUMPHERE(brajump);
5566 if (framesize >= 0)
5567 {
5568 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5569 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5570 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
5571 }
5572 set_jumps(backtrack->common.topbacktracks, LABEL());
5573 }
5574 }
5575 else
5576 {
5577 /* AssertNot is successful. */
5578 if (framesize < 0)
5579 {
5580 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5581 if (bra != OP_BRA)
5582 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5583 else
5584 free_stack(common, 1);
5585 }
5586 else
5587 {
5588 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5589 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5590 /* The topmost item should be 0. */
5591 if (bra != OP_BRA)
5592 {
5593 free_stack(common, framesize + 1);
5594 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5595 }
5596 else
5597 free_stack(common, framesize + 2);
5598 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5599 }
5600
5601 if (bra == OP_BRAZERO)
5602 backtrack->matchingpath = LABEL();
5603 else if (bra == OP_BRAMINZERO)
5604 {
5605 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
5606 JUMPHERE(brajump);
5607 }
5608
5609 if (bra != OP_BRA)
5610 {
5611 SLJIT_ASSERT(found == &backtrack->common.topbacktracks);
5612 set_jumps(backtrack->common.topbacktracks, LABEL());
5613 backtrack->common.topbacktracks = NULL;
5614 }
5615 }
5616
5617 common->quit_label = save_quit_label;
5618 common->accept_label = save_accept_label;
5619 common->quit = save_quit;
5620 common->accept = save_accept;
5621 return cc + 1 + LINK_SIZE;
5622 }
5623
5624 static sljit_sw SLJIT_CALL do_searchovector(sljit_uw refno, sljit_sw* locals, pcre_uchar *name_table)
5625 {
5626 int condition = FALSE;
5627 pcre_uchar *slotA = name_table;
5628 pcre_uchar *slotB;
5629 sljit_sw name_count = locals[LOCALS0 / sizeof(sljit_sw)];
5630 sljit_sw name_entry_size = locals[LOCALS1 / sizeof(sljit_sw)];
5631 sljit_sw no_capture;
5632 int i;
5633
5634 locals += refno & 0xff;
5635 refno >>= 8;
5636 no_capture = locals[1];
5637
5638 for (i = 0; i < name_count; i++)
5639 {
5640 if (GET2(slotA, 0) == refno) break;
5641 slotA += name_entry_size;
5642 }
5643
5644 if (i < name_count)
5645 {
5646 /* Found a name for the number - there can be only one; duplicate names
5647 for different numbers are allowed, but not vice versa. First scan down
5648 for duplicates. */
5649
5650 slotB = slotA;
5651 while (slotB > name_table)
5652 {
5653 slotB -= name_entry_size;
5654 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5655 {
5656 condition = locals[GET2(slotB, 0) << 1] != no_capture;
5657 if (condition) break;
5658 }
5659 else break;
5660 }
5661
5662 /* Scan up for duplicates */
5663 if (!condition)
5664 {
5665 slotB = slotA;
5666 for (i++; i < name_count; i++)
5667 {
5668 slotB += name_entry_size;
5669 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5670 {
5671 condition = locals[GET2(slotB, 0) << 1] != no_capture;
5672 if (condition) break;
5673 }
5674 else break;
5675 }
5676 }
5677 }
5678 return condition;
5679 }
5680
5681 static sljit_sw SLJIT_CALL do_searchgroups(sljit_uw recno, sljit_uw* locals, pcre_uchar *name_table)
5682 {
5683 int condition = FALSE;
5684 pcre_uchar *slotA = name_table;
5685 pcre_uchar *slotB;
5686 sljit_uw name_count = locals[LOCALS0 / sizeof(sljit_sw)];
5687 sljit_uw name_entry_size = locals[LOCALS1 / sizeof(sljit_sw)];
5688 sljit_uw group_num = locals[POSSESSIVE0 / sizeof(sljit_sw)];
5689 sljit_uw i;
5690
5691 for (i = 0; i < name_count; i++)
5692 {
5693 if (GET2(slotA, 0) == recno) break;
5694 slotA += name_entry_size;
5695 }
5696
5697 if (i < name_count)
5698 {
5699 /* Found a name for the number - there can be only one; duplicate
5700 names for different numbers are allowed, but not vice versa. First
5701 scan down for duplicates. */
5702
5703 slotB = slotA;
5704 while (slotB > name_table)
5705 {
5706 slotB -= name_entry_size;
5707 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5708 {
5709 condition = GET2(slotB, 0) == group_num;
5710 if (condition) break;
5711 }
5712 else break;
5713 }
5714
5715 /* Scan up for duplicates */
5716 if (!condition)
5717 {
5718 slotB = slotA;
5719 for (i++; i < name_count; i++)
5720 {
5721 slotB += name_entry_size;
5722 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5723 {
5724 condition = GET2(slotB, 0) == group_num;
5725 if (condition) break;
5726 }
5727 else break;
5728 }
5729 }
5730 }
5731 return condition;
5732 }
5733
5734 /*
5735 Handling bracketed expressions is probably the most complex part.
5736
5737 Stack layout naming characters:
5738 S - Push the current STR_PTR
5739 0 - Push a 0 (NULL)
5740 A - Push the current STR_PTR. Needed for restoring the STR_PTR
5741 before the next alternative. Not pushed if there are no alternatives.
5742 M - Any values pushed by the current alternative. Can be empty, or anything.
5743 C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
5744 L - Push the previous local (pointed by localptr) to the stack
5745 () - opional values stored on the stack
5746 ()* - optonal, can be stored multiple times
5747
5748 The following list shows the regular expression templates, their PCRE byte codes
5749 and stack layout supported by pcre-sljit.
5750
5751 (?:) OP_BRA | OP_KET A M
5752 () OP_CBRA | OP_KET C M
5753 (?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )*
5754 OP_SBRA | OP_KETRMAX 0 L M S ( L M S )*
5755 (?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )*
5756 OP_SBRA | OP_KETRMIN 0 L M S ( L M S )*
5757 ()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )*
5758 OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )*
5759 ()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )*
5760 OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )*
5761 (?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 )
5762 (?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 )
5763 ()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 )
5764 ()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 )
5765 (?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )*
5766 OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )*
5767 (?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )*
5768 OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )*
5769 ()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )*
5770 OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )*
5771 ()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )*
5772 OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )*
5773
5774
5775 Stack layout naming characters:
5776 A - Push the alternative index (starting from 0) on the stack.
5777 Not pushed if there is no alternatives.
5778 M - Any values pushed by the current alternative. Can be empty, or anything.
5779
5780 The next list shows the possible content of a bracket:
5781 (|) OP_*BRA | OP_ALT ... M A
5782 (?()|) OP_*COND | OP_ALT M A
5783 (?>|) OP_ONCE | OP_ALT ... [stack trace] M A
5784 (?>|) OP_ONCE_NC | OP_ALT ... [stack trace] M A
5785 Or nothing, if trace is unnecessary
5786 */
5787
5788 static pcre_uchar *compile_bracket_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5789 {
5790 DEFINE_COMPILER;
5791 backtrack_common *backtrack;
5792 pcre_uchar opcode;
5793 int private_data_ptr = 0;
5794 int offset = 0;
5795 int stacksize;
5796 pcre_uchar *ccbegin;
5797 pcre_uchar *matchingpath;
5798 pcre_uchar bra = OP_BRA;
5799 pcre_uchar ket;
5800 assert_backtrack *assert;
5801 BOOL has_alternatives;
5802 struct sljit_jump *jump;
5803 struct sljit_jump *skip;
5804 struct sljit_label *rmaxlabel = NULL;
5805 struct sljit_jump *braminzerojump = NULL;
5806
5807 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
5808
5809 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
5810 {
5811 bra = *cc;
5812 cc++;
5813 opcode = *cc;
5814 }
5815
5816 opcode = *cc;
5817 ccbegin = cc;
5818 matchingpath = ccbegin + 1 + LINK_SIZE;
5819
5820 if ((opcode == OP_COND || opcode == OP_SCOND) && cc[1 + LINK_SIZE] == OP_DEF)
5821 {
5822 /* Drop this bracket_backtrack. */
5823 parent->top = backtrack->prev;
5824 return bracketend(cc);
5825 }
5826
5827 ket = *(bracketend(cc) - 1 - LINK_SIZE);
5828 SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
5829 SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
5830 cc += GET(cc, 1);
5831
5832 has_alternatives = *cc == OP_ALT;
5833 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
5834 {
5835 has_alternatives = (*matchingpath == OP_RREF) ? FALSE : TRUE;
5836 if (*matchingpath == OP_NRREF)
5837 {
5838 stacksize = GET2(matchingpath, 1);
5839 if (common->currententry == NULL || stacksize == RREF_ANY)
5840 has_alternatives = FALSE;
5841 else if (common->currententry->start == 0)
5842 has_alternatives = stacksize != 0;
5843 else
5844 has_alternatives = stacksize != (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
5845 }
5846 }
5847
5848 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
5849 opcode = OP_SCOND;
5850 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
5851 opcode = OP_ONCE;
5852
5853 if (opcode == OP_CBRA || opcode == OP_SCBRA)
5854 {
5855 /* Capturing brackets has a pre-allocated space. */
5856 offset = GET2(ccbegin, 1 + LINK_SIZE);
5857 if (common->optimized_cbracket[offset] == 0)
5858 {
5859 private_data_ptr = OVECTOR_PRIV(offset);
5860 offset <<= 1;
5861 }
5862 else
5863 {
5864 offset <<= 1;
5865 private_data_ptr = OVECTOR(offset);
5866 }
5867 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
5868 matchingpath += IMM2_SIZE;
5869 }
5870 else if (opcode == OP_ONCE || opcode == OP_SBRA || opcode == OP_SCOND)
5871 {
5872 /* Other brackets simply allocate the next entry. */
5873 private_data_ptr = PRIVATE_DATA(ccbegin);
5874 SLJIT_ASSERT(private_data_ptr != 0);
5875 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
5876 if (opcode == OP_ONCE)
5877 BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, FALSE);
5878 }
5879
5880 /* Instructions before the first alternative. */
5881 stacksize = 0;
5882 if ((ket == OP_KETRMAX) || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
5883 stacksize++;
5884 if (bra == OP_BRAZERO)
5885 stacksize++;
5886
5887 if (stacksize > 0)
5888 allocate_stack(common, stacksize);
5889
5890 stacksize = 0;
5891 if ((ket == OP_KETRMAX) || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
5892 {
5893 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
5894 stacksize++;
5895 }
5896
5897 if (bra == OP_BRAZERO)
5898 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
5899
5900 if (bra == OP_BRAMINZERO)
5901 {
5902 /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
5903 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5904 if (ket != OP_KETRMIN)
5905 {
5906 free_stack(common, 1);
5907 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5908 }
5909 else
5910 {
5911 if (opcode == OP_ONCE || opcode >= OP_SBRA)
5912 {
5913 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5914 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5915 /* Nothing stored during the first run. */
5916 skip = JUMP(SLJIT_JUMP);
5917 JUMPHERE(jump);
5918 /* Checking zero-length iteration. */
5919 if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
5920 {
5921 /* When we come from outside, private_data_ptr contains the previous STR_PTR. */
5922 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5923 }
5924 else
5925 {
5926 /* Except when the whole stack frame must be saved. */
5927 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5928 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (BACKTRACK_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_sw));
5929 }
5930 JUMPHERE(skip);
5931 }
5932 else
5933 {
5934 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5935 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5936 JUMPHERE(jump);
5937 }
5938 }
5939 }
5940
5941 if (ket == OP_KETRMIN)
5942 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
5943
5944 if (ket == OP_KETRMAX)
5945 {
5946 rmaxlabel = LABEL();
5947 if (has_alternatives && opcode != OP_ONCE && opcode < OP_SBRA)
5948 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmaxlabel;
5949 }
5950
5951 /* Handling capturing brackets and alternatives. */
5952 if (opcode == OP_ONCE)
5953 {
5954 if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
5955 {
5956 /* Neither capturing brackets nor recursions are not found in the block. */
5957 if (ket == OP_KETRMIN)
5958 {
5959 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5960 allocate_stack(common, 2);
5961 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5962 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
5963 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5964 }
5965 else if (ket == OP_KETRMAX || has_alternatives)
5966 {
5967 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
5968 allocate_stack(common, 1);
5969 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5970 }
5971 else
5972 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
5973 }
5974 else
5975 {
5976 if (ket == OP_KETRMIN || ket == OP_KETRMAX || has_alternatives)
5977 {
5978 allocate_stack(common, BACKTRACK_AS(bracket_backtrack)->u.framesize + 2);
5979 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5980 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(BACKTRACK_AS(bracket_backtrack)->u.framesize + 1));
5981 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5982 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
5983 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5984 init_frame(common, ccbegin, BACKTRACK_AS(bracket_backtrack)->u.framesize + 1, 2, FALSE);
5985 }
5986 else
5987 {
5988 allocate_stack(common, BACKTRACK_AS(bracket_backtrack)->u.framesize + 1);
5989 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5990 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(BACKTRACK_AS(bracket_backtrack)->u.framesize));
5991 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
5992 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
5993 init_frame(common, ccbegin, BACKTRACK_AS(bracket_backtrack)->u.framesize, 1, FALSE);
5994 }
5995 }
5996 }
5997 else if (opcode == OP_CBRA || opcode == OP_SCBRA)
5998 {
5999 /* Saving the previous values. */
6000 if (common->optimized_cbracket[offset >> 1] != 0)
6001 {
6002 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset));
6003 allocate_stack(common, 2);
6004 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6005 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr + sizeof(sljit_sw));
6006 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
6007 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
6008 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
6009 }
6010 else
6011 {
6012 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6013 allocate_stack(common, 1);
6014 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
6015 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6016 }
6017 }
6018 else if (opcode == OP_SBRA || opcode == OP_SCOND)
6019 {
6020 /* Saving the previous value. */
6021 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6022 allocate_stack(common, 1);
6023 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
6024 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6025 }
6026 else if (has_alternatives)
6027 {
6028 /* Pushing the starting string pointer. */
6029 allocate_stack(common, 1);
6030 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6031 }
6032
6033 /* Generating code for the first alternative. */
6034 if (opcode == OP_COND || opcode == OP_SCOND)
6035 {
6036 if (*matchingpath == OP_CREF)
6037 {
6038 SLJIT_ASSERT(has_alternatives);
6039 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed),
6040 CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
6041 matchingpath += 1 + IMM2_SIZE;
6042 }
6043 else if (*matchingpath == OP_NCREF)
6044 {
6045 SLJIT_ASSERT(has_alternatives);
6046 stacksize = GET2(matchingpath, 1);
6047 jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(stacksize << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
6048
6049 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
6050 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
6051 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
6052 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, (stacksize << 8) | (common->ovector_start / sizeof(sljit_sw)));
6053 GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, 0);
6054 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, common->name_table);
6055 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchovector));
6056 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
6057 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, 0));
6058
6059 JUMPHERE(jump);
6060 matchingpath += 1 + IMM2_SIZE;
6061 }
6062 else if (*matchingpath == OP_RREF || *matchingpath == OP_NRREF)
6063 {
6064 /* Never has other case. */
6065 BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;
6066
6067 stacksize = GET2(matchingpath, 1);
6068 if (common->currententry == NULL)
6069 stacksize = 0;
6070 else if (stacksize == RREF_ANY)
6071 stacksize = 1;
6072 else if (common->currententry->start == 0)
6073 stacksize = stacksize == 0;
6074 else
6075 stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
6076
6077 if (*matchingpath == OP_RREF || stacksize || common->currententry == NULL)
6078 {
6079 SLJIT_ASSERT(!has_alternatives);
6080 if (stacksize != 0)
6081 matchingpath += 1 + IMM2_SIZE;
6082 else
6083 {
6084 if (*cc == OP_ALT)
6085 {
6086 matchingpath = cc + 1 + LINK_SIZE;
6087 cc += GET(cc, 1);
6088 }
6089 else
6090 matchingpath = cc;
6091 }
6092 }
6093 else
6094 {
6095 SLJIT_ASSERT(has_alternatives);
6096
6097 stacksize = GET2(matchingpath, 1);
6098 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
6099 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
6100 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
6101 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, GET2(common->start, common->currententry->start + 1 + LINK_SIZE));
6102 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, stacksize);
6103 GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, 0);
6104 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, common->name_table);
6105 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchgroups));
6106 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
6107 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, 0));
6108 matchingpath += 1 + IMM2_SIZE;
6109 }
6110 }
6111 else
6112 {
6113 SLJIT_ASSERT(has_alternatives && *matchingpath >= OP_ASSERT && *matchingpath <= OP_ASSERTBACK_NOT);
6114 /* Similar code as PUSH_BACKTRACK macro. */
6115 assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack));
6116 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6117 return NULL;
6118 memset(assert, 0, sizeof(assert_backtrack));
6119 assert->common.cc = matchingpath;
6120 BACKTRACK_AS(bracket_backtrack)->u.assert = assert;
6121 matchingpath = compile_assert_matchingpath(common, matchingpath, assert, TRUE);
6122 }
6123 }
6124
6125 compile_matchingpath(common, matchingpath, cc, backtrack);
6126 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6127 return NULL;
6128
6129 if (opcode == OP_ONCE)
6130 {
6131 if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
6132 {
6133 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6134 /* TMP2 which is set here used by OP_KETRMAX below. */
6135 if (ket == OP_KETRMAX)
6136 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0);
6137 else if (ket == OP_KETRMIN)
6138 {
6139 /* Move the STR_PTR to the private_data_ptr. */
6140 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), 0);
6141 }
6142 }
6143 else
6144 {
6145 stacksize = (ket == OP_KETRMIN || ket == OP_KETRMAX || has_alternatives) ? 2 : 1;
6146 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize) * sizeof(sljit_sw));
6147 if (ket == OP_KETRMAX)
6148 {
6149 /* TMP2 which is set here used by OP_KETRMAX below. */
6150 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6151 }
6152 }
6153 }
6154
6155 stacksize = 0;
6156 if (ket != OP_KET || bra != OP_BRA)
6157 stacksize++;
6158 if (offset != 0)
6159 {
6160 if (common->capture_last_ptr != 0)
6161 stacksize++;
6162 if (common->optimized_cbracket[offset >> 1] == 0)
6163 stacksize += 2;
6164 }
6165 if (has_alternatives && opcode != OP_ONCE)
6166 stacksize++;
6167
6168 if (stacksize > 0)
6169 allocate_stack(common, stacksize);
6170
6171 stacksize = 0;
6172 if (ket != OP_KET || bra != OP_BRA)
6173 {
6174 if (ket != OP_KET)
6175 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6176 else
6177 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
6178 stacksize++;
6179 }
6180
6181 if (offset != 0)
6182 {
6183 if (common->capture_last_ptr != 0)
6184 {
6185 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
6186 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
6187 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0);
6188 stacksize++;
6189 }
6190 if (common->optimized_cbracket[offset >> 1] == 0)
6191 {
6192 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
6193 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
6194 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6195 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6196 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
6197 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6198 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6199 stacksize += 2;
6200 }
6201 }
6202
6203 if (has_alternatives)
6204 {
6205 if (opcode != OP_ONCE)
6206 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
6207 if (ket != OP_KETRMAX)
6208 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
6209 }
6210
6211 /* Must be after the matchingpath label. */
6212 if (offset != 0 && common->optimized_cbracket[offset >> 1] != 0)
6213 {
6214 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
6215 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6216 }
6217
6218 if (ket == OP_KETRMAX)
6219 {
6220 if (opcode == OP_ONCE || opcode >= OP_SBRA)
6221 {
6222 if (has_alternatives)
6223 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
6224 /* Checking zero-length iteration. */
6225 if (opcode != OP_ONCE)
6226 {
6227 CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0, rmaxlabel);
6228 /* Drop STR_PTR for greedy plus quantifier. */
6229 if (bra != OP_BRAZERO)
6230 free_stack(common, 1);
6231 }
6232 else
6233 /* TMP2 must contain the starting STR_PTR. */
6234 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmaxlabel);
6235 }
6236 else
6237 JUMPTO(SLJIT_JUMP, rmaxlabel);
6238 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
6239 }
6240
6241 if (bra == OP_BRAZERO)
6242 BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL();
6243
6244 if (bra == OP_BRAMINZERO)
6245 {
6246 /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */
6247 JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath);
6248 if (braminzerojump != NULL)
6249 {
6250 JUMPHERE(braminzerojump);
6251 /* We need to release the end pointer to perform the
6252 backtrack for the zero-length iteration. When
6253 framesize is < 0, OP_ONCE will do the release itself. */
6254 if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0)
6255 {
6256 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6257 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6258 }
6259 else if (ket == OP_KETRMIN && opcode != OP_ONCE)
6260 free_stack(common, 1);
6261 }
6262 /* Continue to the normal backtrack. */
6263 }
6264
6265 if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
6266 decrease_call_count(common);
6267
6268 /* Skip the other alternatives. */
6269 while (*cc == OP_ALT)
6270 cc += GET(cc, 1);
6271 cc += 1 + LINK_SIZE;
6272 return cc;
6273 }
6274
6275 static pcre_uchar *compile_bracketpos_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6276 {
6277 DEFINE_COMPILER;
6278 backtrack_common *backtrack;
6279 pcre_uchar opcode;
6280 int private_data_ptr;
6281 int cbraprivptr = 0;
6282 int framesize;
6283 int stacksize;
6284 int offset = 0;
6285 BOOL zero = FALSE;
6286 pcre_uchar *ccbegin = NULL;
6287 int stack;
6288 struct sljit_label *loop = NULL;
6289 struct jump_list *emptymatch = NULL;
6290
6291 PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL);
6292 if (*cc == OP_BRAPOSZERO)
6293 {
6294 zero = TRUE;
6295 cc++;
6296 }
6297
6298 opcode = *cc;
6299 private_data_ptr = PRIVATE_DATA(cc);
6300 SLJIT_ASSERT(private_data_ptr != 0);
6301 BACKTRACK_AS(bracketpos_backtrack)->private_data_ptr = private_data_ptr;
6302 switch(opcode)
6303 {
6304 case OP_BRAPOS:
6305 case OP_SBRAPOS:
6306 ccbegin = cc + 1 + LINK_SIZE;
6307 break;
6308
6309 case OP_CBRAPOS:
6310 case OP_SCBRAPOS:
6311 offset = GET2(cc, 1 + LINK_SIZE);
6312 /* This case cannot be optimized in the same was as
6313 normal capturing brackets. */
6314 SLJIT_ASSERT(common->optimized_cbracket[offset] == 0);
6315 cbraprivptr = OVECTOR_PRIV(offset);
6316 offset <<= 1;
6317 ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
6318 break;
6319
6320 default:
6321 SLJIT_ASSERT_STOP();
6322 break;
6323 }
6324
6325 framesize = get_framesize(common, cc, FALSE);
6326 BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;
6327 if (framesize < 0)
6328 {
6329 if (offset != 0)
6330 {
6331 stacksize = 2;
6332 if (common->capture_last_ptr != 0)
6333 stacksize++;
6334 }
6335 else
6336 stacksize = 1;
6337
6338 if (!zero)
6339 stacksize++;
6340
6341 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
6342 allocate_stack(common, stacksize);
6343 if (framesize == no_frame)
6344 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
6345
6346 if (offset != 0)
6347 {
6348 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
6349 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
6350 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
6351 if (common->capture_last_ptr != 0)
6352 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
6353 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
6354 if (common->capture_last_ptr != 0)
6355 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
6356 }
6357 else
6358 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6359
6360 if (!zero)
6361 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 1);
6362 }
6363 else
6364 {
6365 stacksize = framesize + 1;
6366 if (!zero)
6367 stacksize++;
6368 if (opcode == OP_BRAPOS || opcode == OP_SBRAPOS)
6369 stacksize++;
6370 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
6371
6372 allocate_stack(common, stacksize);
6373 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6374 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(stacksize - 1));
6375 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
6376
6377 stack = 0;
6378 if (!zero)
6379 {
6380 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
6381 stack++;
6382 }
6383 if (opcode == OP_BRAPOS || opcode == OP_SBRAPOS)
6384 {
6385 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
6386 stack++;
6387 }
6388 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
6389 init_frame(common, cc, stacksize - 1, stacksize - framesize, FALSE);
6390 }
6391
6392 if (offset != 0)
6393 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
6394
6395 loop = LABEL();
6396 while (*cc != OP_KETRPOS)
6397 {
6398 backtrack->top = NULL;
6399 backtrack->topbacktracks = NULL;
6400 cc += GET(cc, 1);
6401
6402 compile_matchingpath(common, ccbegin, cc, backtrack);
6403 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6404 return NULL;
6405
6406 if (framesize < 0)
6407 {
6408 if (framesize == no_frame)
6409 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6410
6411 if (offset != 0)
6412 {
6413 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
6414 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6415 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
6416 if (common->capture_last_ptr != 0)
6417 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
6418 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6419 }
6420 else
6421 {
6422 if (opcode == OP_SBRAPOS)
6423 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6424 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6425 }
6426
6427 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
6428 add_jump(compiler, &emptymatch, CMP(SLJIT_C_EQUAL, TMP1, 0, STR_PTR, 0));
6429
6430 if (!zero)
6431 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
6432 }
6433 else
6434 {
6435 if (offset != 0)
6436 {
6437 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, stacksize * sizeof(sljit_sw));
6438 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
6439 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6440 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
6441 if (common->capture_last_ptr != 0)
6442 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
6443 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6444 }
6445 else
6446 {
6447 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6448 OP2(SLJIT_ADD, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
6449 if (opcode == OP_SBRAPOS)
6450 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw));
6451 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw), STR_PTR, 0);
6452 }
6453
6454 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
6455 add_jump(compiler, &emptymatch, CMP(SLJIT_C_EQUAL, TMP1, 0, STR_PTR, 0));
6456
6457 if (!zero)
6458 {
6459 if (framesize < 0)
6460 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
6461 else
6462 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6463 }
6464 }
6465 JUMPTO(SLJIT_JUMP, loop);
6466 flush_stubs(common);
6467
6468 compile_backtrackingpath(common, backtrack->top);
6469 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6470 return NULL;
6471 set_jumps(backtrack->topbacktracks, LABEL());
6472
6473 if (framesize < 0)
6474 {
6475 if (offset != 0)
6476 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
6477 else
6478 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6479 }
6480 else
6481 {
6482 if (offset != 0)
6483 {
6484 /* Last alternative. */
6485 if (*cc == OP_KETRPOS)
6486 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6487 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
6488 }
6489 else
6490 {
6491 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6492 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw));
6493 }
6494 }
6495
6496 if (*cc == OP_KETRPOS)
6497 break;
6498 ccbegin = cc + 1 + LINK_SIZE;
6499 }
6500
6501 backtrack->topbacktracks = NULL;
6502 if (!zero)
6503 {
6504 if (framesize < 0)
6505 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0));
6506 else /* TMP2 is set to [private_data_ptr] above. */
6507 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(TMP2), (stacksize - 1) * sizeof(sljit_sw), SLJIT_IMM, 0));
6508 }
6509
6510 /* None of them matched. */
6511 set_jumps(emptymatch, LABEL());
6512 decrease_call_count(common);
6513 return cc + 1 + LINK_SIZE;
6514 }
6515
6516 static SLJIT_INLINE pcre_uchar *get_iterator_parameters(compiler_common *common, pcre_uchar *cc, pcre_uchar *opcode, pcre_uchar *type, int *arg1, int *arg2, pcre_uchar **end)
6517 {
6518 int class_len;
6519
6520 *opcode = *cc;
6521 if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO)
6522 {
6523 cc++;
6524 *type = OP_CHAR;
6525 }
6526 else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI)
6527 {
6528 cc++;
6529 *type = OP_CHARI;
6530 *opcode -= OP_STARI - OP_STAR;
6531 }
6532 else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO)
6533 {
6534 cc++;
6535 *type = OP_NOT;
6536 *opcode -= OP_NOTSTAR - OP_STAR;
6537 }
6538 else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI)
6539 {
6540 cc++;
6541 *type = OP_NOTI;
6542 *opcode -= OP_NOTSTARI - OP_STAR;
6543 }
6544 else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO)
6545 {
6546 cc++;
6547 *opcode -= OP_TYPESTAR - OP_STAR;
6548 *type = 0;
6549 }
6550 else
6551 {
6552 SLJIT_ASSERT(*opcode >= OP_CLASS || *opcode <= OP_XCLASS);
6553 *type = *opcode;
6554 cc++;
6555 class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(pcre_uchar))) : GET(cc, 0);
6556 *opcode = cc[class_len - 1];
6557 if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY)
6558 {
6559 *opcode -= OP_CRSTAR - OP_STAR;
6560 if (end != NULL)
6561 *end = cc + class_len;
6562 }
6563 else
6564 {
6565 SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE);
6566 *arg1 = GET2(cc, (class_len + IMM2_SIZE));
6567 *arg2 = GET2(cc, class_len);
6568
6569 if (*arg2 == 0)
6570 {
6571 SLJIT_ASSERT(*arg1 != 0);
6572 *opcode = (*opcode == OP_CRRANGE) ? OP_UPTO : OP_MINUPTO;
6573 }
6574 if (*arg1 == *arg2)
6575 *opcode = OP_EXACT;
6576
6577 if (end != NULL)
6578 *end = cc + class_len + 2 * IMM2_SIZE;
6579 }
6580 return cc;
6581 }
6582
6583 if (*opcode == OP_UPTO || *opcode == OP_MINUPTO || *opcode == OP_EXACT || *opcode == OP_POSUPTO)
6584 {
6585 *arg1 = GET2(cc, 0);
6586 cc += IMM2_SIZE;
6587 }
6588
6589 if (*type == 0)
6590 {
6591 *type = *cc;
6592 if (end != NULL)
6593 *end = next_opcode(common, cc);
6594 cc++;
6595 return cc;
6596 }
6597
6598 if (end != NULL)
6599 {
6600 *end = cc + 1;
6601 #ifdef SUPPORT_UTF
6602 if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc);
6603 #endif
6604 }
6605 return cc;
6606 }
6607
6608 static pcre_uchar *compile_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6609 {
6610 DEFINE_COMPILER;
6611 backtrack_common *backtrack;
6612 pcre_uchar opcode;
6613 pcre_uchar type;
6614 int arg1 = -1, arg2 = -1;
6615 pcre_uchar* end;
6616 jump_list *nomatch = NULL;
6617 struct sljit_jump *jump = NULL;
6618 struct sljit_label *label;
6619 int private_data_ptr = PRIVATE_DATA(cc);
6620 int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_LOCALS_REG);
6621 int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
6622 int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
6623 int tmp_base, tmp_offset;
6624
6625 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
6626
6627 cc = get_iterator_parameters(common, cc, &opcode, &type, &arg1, &arg2, &end);
6628
6629 switch(type)
6630 {
6631 case OP_NOT_DIGIT:
6632 case OP_DIGIT:
6633 case OP_NOT_WHITESPACE:
6634 case OP_WHITESPACE:
6635 case OP_NOT_WORDCHAR:
6636 case OP_WORDCHAR:
6637 case OP_ANY:
6638 case OP_ALLANY:
6639 case OP_ANYBYTE:
6640 case OP_ANYNL:
6641 case OP_NOT_HSPACE:
6642 case OP_HSPACE:
6643 case OP_NOT_VSPACE:
6644 case OP_VSPACE:
6645 case OP_CHAR:
6646 case OP_CHARI:
6647 case OP_NOT:
6648 case OP_NOTI:
6649 case OP_CLASS:
6650 case OP_NCLASS:
6651 tmp_base = TMP3;
6652 tmp_offset = 0;
6653 break;
6654
6655 default:
6656 SLJIT_ASSERT_STOP();
6657 /* Fall through. */
6658
6659 case OP_EXTUNI:
6660 case OP_XCLASS:
6661 case OP_NOTPROP:
6662 case OP_PROP:
6663 tmp_base = SLJIT_MEM1(SLJIT_LOCALS_REG);
6664 tmp_offset = POSSESSIVE0;
6665 break;
6666 }
6667
6668 switch(opcode)
6669 {
6670 case OP_STAR:
6671 case OP_PLUS:
6672 case OP_UPTO:
6673 case OP_CRRANGE:
6674 if (type == OP_ANYNL || type == OP_EXTUNI)
6675 {
6676 SLJIT_ASSERT(private_data_ptr == 0);
6677 if (opcode == OP_STAR || opcode == OP_UPTO)
6678 {
6679 allocate_stack(common, 2);
6680 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6681 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
6682 }
6683 else
6684 {
6685 allocate_stack(common, 1);
6686 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6687 }
6688
6689 if (opcode == OP_UPTO || opcode == OP_CRRANGE)
6690 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
6691
6692 label = LABEL();
6693 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
6694 if (opcode == OP_UPTO || opcode == OP_CRRANGE)
6695 {
6696 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
6697 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6698 if (opcode == OP_CRRANGE && arg2 > 0)
6699 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg2, label);
6700 if (opcode == OP_UPTO || (opcode == OP_CRRANGE && arg1 > 0))
6701 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, arg1);
6702 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
6703 }
6704
6705 /* We cannot use TMP3 because of this allocate_stack. */
6706 allocate_stack(common, 1);
6707 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6708 JUMPTO(SLJIT_JUMP, label);
6709 if (jump != NULL)
6710 JUMPHERE(jump);
6711 }
6712 else
6713 {
6714 if (opcode == OP_PLUS)
6715 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
6716 if (private_data_ptr == 0)
6717 allocate_stack(common, 2);
6718 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6719 if (opcode <= OP_PLUS)
6720 OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
6721 else
6722 OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, 1);
6723 label = LABEL();
6724 compile_char1_matchingpath(common, type, cc, &nomatch);
6725 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6726 if (opcode <= OP_PLUS)
6727 JUMPTO(SLJIT_JUMP, label);
6728 else if (opcode == OP_CRRANGE && arg1 == 0)
6729 {
6730 OP2(SLJIT_ADD, base, offset1, base, offset1, SLJIT_IMM, 1);
6731 JUMPTO(SLJIT_JUMP, label);
6732 }
6733 else
6734 {
6735 OP1(SLJIT_MOV, TMP1, 0, base, offset1);
6736 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6737 OP1(SLJIT_MOV, base, offset1, TMP1, 0);
6738 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 1, label);
6739 }
6740 set_jumps(nomatch, LABEL());
6741 if (opcode == OP_CRRANGE)
6742 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_LESS, base, offset1, SLJIT_IMM, arg2 + 1));
6743 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
6744 }
6745 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6746 break;