/[pcre]/code/trunk/pcre_jit_compile.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1278 - (show annotations)
Tue Mar 12 06:15:04 2013 UTC (6 years, 7 months ago) by zherczeg
File MIME type: text/plain
File size: 294856 byte(s)
Error occurred while calculating annotation data.
Support for SKIP with argument is added to the JIT compiler. Control verb support is still experimental.
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2013 University of Cambridge
10
11 The machine code generator part (this module) was written by Zoltan Herczeg
12 Copyright (c) 2010-2013
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include "pcre_internal.h"
48
49 #if defined SUPPORT_JIT
50
51 /* All-in-one: Since we use the JIT compiler only from here,
52 we just include it. This way we don't need to touch the build
53 system files. */
54
55 #define SLJIT_MALLOC(size) (PUBL(malloc))(size)
56 #define SLJIT_FREE(ptr) (PUBL(free))(ptr)
57 #define SLJIT_CONFIG_AUTO 1
58 #define SLJIT_CONFIG_STATIC 1
59 #define SLJIT_VERBOSE 0
60 #define SLJIT_DEBUG 0
61
62 #include "sljit/sljitLir.c"
63
64 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 #error Unsupported architecture
66 #endif
67
68 /* Defines for debugging purposes. */
69
70 /* 1 - Use unoptimized capturing brackets.
71 2 - Enable capture_last_ptr (includes option 1). */
72 /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
73
74 /* 1 - Always have a control head. */
75 /* #define DEBUG_FORCE_CONTROL_HEAD 1 */
76
77 /* Allocate memory for the regex stack on the real machine stack.
78 Fast, but limited size. */
79 #define MACHINE_STACK_SIZE 32768
80
81 /* Growth rate for stack allocated by the OS. Should be the multiply
82 of page size. */
83 #define STACK_GROWTH_RATE 8192
84
85 /* Enable to check that the allocation could destroy temporaries. */
86 #if defined SLJIT_DEBUG && SLJIT_DEBUG
87 #define DESTROY_REGISTERS 1
88 #endif
89
90 /*
91 Short summary about the backtracking mechanism empolyed by the jit code generator:
92
93 The code generator follows the recursive nature of the PERL compatible regular
94 expressions. The basic blocks of regular expressions are condition checkers
95 whose execute different commands depending on the result of the condition check.
96 The relationship between the operators can be horizontal (concatenation) and
97 vertical (sub-expression) (See struct backtrack_common for more details).
98
99 'ab' - 'a' and 'b' regexps are concatenated
100 'a+' - 'a' is the sub-expression of the '+' operator
101
102 The condition checkers are boolean (true/false) checkers. Machine code is generated
103 for the checker itself and for the actions depending on the result of the checker.
104 The 'true' case is called as the matching path (expected path), and the other is called as
105 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
106 branches on the matching path.
107
108 Greedy star operator (*) :
109 Matching path: match happens.
110 Backtrack path: match failed.
111 Non-greedy star operator (*?) :
112 Matching path: no need to perform a match.
113 Backtrack path: match is required.
114
115 The following example shows how the code generated for a capturing bracket
116 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
117 we have the following regular expression:
118
119 A(B|C)D
120
121 The generated code will be the following:
122
123 A matching path
124 '(' matching path (pushing arguments to the stack)
125 B matching path
126 ')' matching path (pushing arguments to the stack)
127 D matching path
128 return with successful match
129
130 D backtrack path
131 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
132 B backtrack path
133 C expected path
134 jump to D matching path
135 C backtrack path
136 A backtrack path
137
138 Notice, that the order of backtrack code paths are the opposite of the fast
139 code paths. In this way the topmost value on the stack is always belong
140 to the current backtrack code path. The backtrack path must check
141 whether there is a next alternative. If so, it needs to jump back to
142 the matching path eventually. Otherwise it needs to clear out its own stack
143 frame and continue the execution on the backtrack code paths.
144 */
145
146 /*
147 Saved stack frames:
148
149 Atomic blocks and asserts require reloading the values of private data
150 when the backtrack mechanism performed. Because of OP_RECURSE, the data
151 are not necessarly known in compile time, thus we need a dynamic restore
152 mechanism.
153
154 The stack frames are stored in a chain list, and have the following format:
155 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
156
157 Thus we can restore the private data to a particular point in the stack.
158 */
159
160 typedef struct jit_arguments {
161 /* Pointers first. */
162 struct sljit_stack *stack;
163 const pcre_uchar *str;
164 const pcre_uchar *begin;
165 const pcre_uchar *end;
166 int *offsets;
167 pcre_uchar *uchar_ptr;
168 pcre_uchar *mark_ptr;
169 void *callout_data;
170 /* Everything else after. */
171 int real_offset_count;
172 int offset_count;
173 int call_limit;
174 pcre_uint8 notbol;
175 pcre_uint8 noteol;
176 pcre_uint8 notempty;
177 pcre_uint8 notempty_atstart;
178 } jit_arguments;
179
180 typedef struct executable_functions {
181 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
182 PUBL(jit_callback) callback;
183 void *userdata;
184 pcre_uint32 top_bracket;
185 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
186 } executable_functions;
187
188 typedef struct jump_list {
189 struct sljit_jump *jump;
190 struct jump_list *next;
191 } jump_list;
192
193 typedef struct stub_list {
194 struct sljit_jump *start;
195 struct sljit_label *quit;
196 struct stub_list *next;
197 } stub_list;
198
199 enum frame_types {
200 no_frame = -1,
201 no_stack = -2
202 };
203
204 enum control_types {
205 type_commit = 0,
206 type_prune = 1,
207 type_skip = 2,
208 type_skip_arg = 3,
209 type_mark = 4
210 };
211
212 typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
213
214 /* The following structure is the key data type for the recursive
215 code generator. It is allocated by compile_matchingpath, and contains
216 the aguments for compile_backtrackingpath. Must be the first member
217 of its descendants. */
218 typedef struct backtrack_common {
219 /* Concatenation stack. */
220 struct backtrack_common *prev;
221 jump_list *nextbacktracks;
222 /* Internal stack (for component operators). */
223 struct backtrack_common *top;
224 jump_list *topbacktracks;
225 /* Opcode pointer. */
226 pcre_uchar *cc;
227 } backtrack_common;
228
229 typedef struct assert_backtrack {
230 backtrack_common common;
231 jump_list *condfailed;
232 /* Less than 0 if a frame is not needed. */
233 int framesize;
234 /* Points to our private memory word on the stack. */
235 int private_data_ptr;
236 /* For iterators. */
237 struct sljit_label *matchingpath;
238 } assert_backtrack;
239
240 typedef struct bracket_backtrack {
241 backtrack_common common;
242 /* Where to coninue if an alternative is successfully matched. */
243 struct sljit_label *alternative_matchingpath;
244 /* For rmin and rmax iterators. */
245 struct sljit_label *recursive_matchingpath;
246 /* For greedy ? operator. */
247 struct sljit_label *zero_matchingpath;
248 /* Contains the branches of a failed condition. */
249 union {
250 /* Both for OP_COND, OP_SCOND. */
251 jump_list *condfailed;
252 assert_backtrack *assert;
253 /* For OP_ONCE. Less than 0 if not needed. */
254 int framesize;
255 } u;
256 /* Points to our private memory word on the stack. */
257 int private_data_ptr;
258 } bracket_backtrack;
259
260 typedef struct bracketpos_backtrack {
261 backtrack_common common;
262 /* Points to our private memory word on the stack. */
263 int private_data_ptr;
264 /* Reverting stack is needed. */
265 int framesize;
266 /* Allocated stack size. */
267 int stacksize;
268 } bracketpos_backtrack;
269
270 typedef struct braminzero_backtrack {
271 backtrack_common common;
272 struct sljit_label *matchingpath;
273 } braminzero_backtrack;
274
275 typedef struct iterator_backtrack {
276 backtrack_common common;
277 /* Next iteration. */
278 struct sljit_label *matchingpath;
279 } iterator_backtrack;
280
281 typedef struct recurse_entry {
282 struct recurse_entry *next;
283 /* Contains the function entry. */
284 struct sljit_label *entry;
285 /* Collects the calls until the function is not created. */
286 jump_list *calls;
287 /* Points to the starting opcode. */
288 int start;
289 } recurse_entry;
290
291 typedef struct recurse_backtrack {
292 backtrack_common common;
293 BOOL inlined_pattern;
294 } recurse_backtrack;
295
296 #define MAX_RANGE_SIZE 6
297
298 typedef struct compiler_common {
299 /* The sljit ceneric compiler. */
300 struct sljit_compiler *compiler;
301 /* First byte code. */
302 pcre_uchar *start;
303 /* Maps private data offset to each opcode. */
304 int *private_data_ptrs;
305 /* Tells whether the capturing bracket is optimized. */
306 pcre_uint8 *optimized_cbracket;
307 /* Starting offset of private data for capturing brackets. */
308 int cbra_ptr;
309 /* Output vector starting point. Must be divisible by 2. */
310 int ovector_start;
311 /* Last known position of the requested byte. */
312 int req_char_ptr;
313 /* Head of the last recursion. */
314 int recursive_head_ptr;
315 /* First inspected character for partial matching. */
316 int start_used_ptr;
317 /* Starting pointer for partial soft matches. */
318 int hit_start;
319 /* End pointer of the first line. */
320 int first_line_end;
321 /* Points to the marked string. */
322 int mark_ptr;
323 /* Recursive control verb management chain. */
324 int control_head_ptr;
325 /* Points to the last matched capture block index. */
326 int capture_last_ptr;
327 /* Points to the starting position of the current match. */
328 int start_ptr;
329
330 /* Flipped and lower case tables. */
331 const pcre_uint8 *fcc;
332 sljit_sw lcc;
333 /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
334 int mode;
335 /* \K is in the pattern. */
336 BOOL has_set_som;
337 /* (*SKIP:arg) is in the pattern. */
338 BOOL has_skip_arg;
339 /* Needs to know the start position anytime. */
340 BOOL needs_start_ptr;
341 /* Currently in recurse or assert. */
342 BOOL local_exit;
343 /* Newline control. */
344 int nltype;
345 int newline;
346 int bsr_nltype;
347 /* Dollar endonly. */
348 int endonly;
349 /* Tables. */
350 sljit_sw ctypes;
351 int digits[2 + MAX_RANGE_SIZE];
352 /* Named capturing brackets. */
353 sljit_uw name_table;
354 sljit_sw name_count;
355 sljit_sw name_entry_size;
356
357 /* Labels and jump lists. */
358 struct sljit_label *partialmatchlabel;
359 struct sljit_label *quit_label;
360 struct sljit_label *forced_quit_label;
361 struct sljit_label *accept_label;
362 stub_list *stubs;
363 recurse_entry *entries;
364 recurse_entry *currententry;
365 jump_list *partialmatch;
366 jump_list *quit;
367 jump_list *forced_quit;
368 jump_list *accept;
369 jump_list *calllimit;
370 jump_list *stackalloc;
371 jump_list *revertframes;
372 jump_list *wordboundary;
373 jump_list *anynewline;
374 jump_list *hspace;
375 jump_list *vspace;
376 jump_list *casefulcmp;
377 jump_list *caselesscmp;
378 jump_list *reset_match;
379 BOOL jscript_compat;
380 #ifdef SUPPORT_UTF
381 BOOL utf;
382 #ifdef SUPPORT_UCP
383 BOOL use_ucp;
384 #endif
385 #ifndef COMPILE_PCRE32
386 jump_list *utfreadchar;
387 #endif
388 #ifdef COMPILE_PCRE8
389 jump_list *utfreadtype8;
390 #endif
391 #endif /* SUPPORT_UTF */
392 #ifdef SUPPORT_UCP
393 jump_list *getucd;
394 #endif
395 } compiler_common;
396
397 /* For byte_sequence_compare. */
398
399 typedef struct compare_context {
400 int length;
401 int sourcereg;
402 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
403 int ucharptr;
404 union {
405 sljit_si asint;
406 sljit_uh asushort;
407 #if defined COMPILE_PCRE8
408 sljit_ub asbyte;
409 sljit_ub asuchars[4];
410 #elif defined COMPILE_PCRE16
411 sljit_uh asuchars[2];
412 #elif defined COMPILE_PCRE32
413 sljit_ui asuchars[1];
414 #endif
415 } c;
416 union {
417 sljit_si asint;
418 sljit_uh asushort;
419 #if defined COMPILE_PCRE8
420 sljit_ub asbyte;
421 sljit_ub asuchars[4];
422 #elif defined COMPILE_PCRE16
423 sljit_uh asuchars[2];
424 #elif defined COMPILE_PCRE32
425 sljit_ui asuchars[1];
426 #endif
427 } oc;
428 #endif
429 } compare_context;
430
431 /* Undefine sljit macros. */
432 #undef CMP
433
434 /* Used for accessing the elements of the stack. */
435 #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_sw))
436
437 #define TMP1 SLJIT_SCRATCH_REG1
438 #define TMP2 SLJIT_SCRATCH_REG3
439 #define TMP3 SLJIT_TEMPORARY_EREG2
440 #define STR_PTR SLJIT_SAVED_REG1
441 #define STR_END SLJIT_SAVED_REG2
442 #define STACK_TOP SLJIT_SCRATCH_REG2
443 #define STACK_LIMIT SLJIT_SAVED_REG3
444 #define ARGUMENTS SLJIT_SAVED_EREG1
445 #define CALL_COUNT SLJIT_SAVED_EREG2
446 #define RETURN_ADDR SLJIT_TEMPORARY_EREG1
447
448 /* Local space layout. */
449 /* These two locals can be used by the current opcode. */
450 #define LOCALS0 (0 * sizeof(sljit_sw))
451 #define LOCALS1 (1 * sizeof(sljit_sw))
452 /* Two local variables for possessive quantifiers (char1 cannot use them). */
453 #define POSSESSIVE0 (2 * sizeof(sljit_sw))
454 #define POSSESSIVE1 (3 * sizeof(sljit_sw))
455 /* Max limit of recursions. */
456 #define CALL_LIMIT (4 * sizeof(sljit_sw))
457 /* The output vector is stored on the stack, and contains pointers
458 to characters. The vector data is divided into two groups: the first
459 group contains the start / end character pointers, and the second is
460 the start pointers when the end of the capturing group has not yet reached. */
461 #define OVECTOR_START (common->ovector_start)
462 #define OVECTOR(i) (OVECTOR_START + (i) * sizeof(sljit_sw))
463 #define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * sizeof(sljit_sw))
464 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
465
466 #if defined COMPILE_PCRE8
467 #define MOV_UCHAR SLJIT_MOV_UB
468 #define MOVU_UCHAR SLJIT_MOVU_UB
469 #elif defined COMPILE_PCRE16
470 #define MOV_UCHAR SLJIT_MOV_UH
471 #define MOVU_UCHAR SLJIT_MOVU_UH
472 #elif defined COMPILE_PCRE32
473 #define MOV_UCHAR SLJIT_MOV_UI
474 #define MOVU_UCHAR SLJIT_MOVU_UI
475 #else
476 #error Unsupported compiling mode
477 #endif
478
479 /* Shortcuts. */
480 #define DEFINE_COMPILER \
481 struct sljit_compiler *compiler = common->compiler
482 #define OP1(op, dst, dstw, src, srcw) \
483 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
484 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
485 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
486 #define LABEL() \
487 sljit_emit_label(compiler)
488 #define JUMP(type) \
489 sljit_emit_jump(compiler, (type))
490 #define JUMPTO(type, label) \
491 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
492 #define JUMPHERE(jump) \
493 sljit_set_label((jump), sljit_emit_label(compiler))
494 #define SET_LABEL(jump, label) \
495 sljit_set_label((jump), (label))
496 #define CMP(type, src1, src1w, src2, src2w) \
497 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
498 #define CMPTO(type, src1, src1w, src2, src2w, label) \
499 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
500 #define OP_FLAGS(op, dst, dstw, src, srcw, type) \
501 sljit_emit_op_flags(compiler, (op), (dst), (dstw), (src), (srcw), (type))
502 #define GET_LOCAL_BASE(dst, dstw, offset) \
503 sljit_get_local_base(compiler, (dst), (dstw), (offset))
504
505 static pcre_uchar* bracketend(pcre_uchar* cc)
506 {
507 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
508 do cc += GET(cc, 1); while (*cc == OP_ALT);
509 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
510 cc += 1 + LINK_SIZE;
511 return cc;
512 }
513
514 /* Functions whose might need modification for all new supported opcodes:
515 next_opcode
516 get_private_data_length
517 set_private_data_ptrs
518 get_framesize
519 init_frame
520 get_private_data_copy_length
521 copy_private_data
522 compile_matchingpath
523 compile_backtrackingpath
524 */
525
526 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
527 {
528 SLJIT_UNUSED_ARG(common);
529 switch(*cc)
530 {
531 case OP_SOD:
532 case OP_SOM:
533 case OP_SET_SOM:
534 case OP_NOT_WORD_BOUNDARY:
535 case OP_WORD_BOUNDARY:
536 case OP_NOT_DIGIT:
537 case OP_DIGIT:
538 case OP_NOT_WHITESPACE:
539 case OP_WHITESPACE:
540 case OP_NOT_WORDCHAR:
541 case OP_WORDCHAR:
542 case OP_ANY:
543 case OP_ALLANY:
544 case OP_NOTPROP:
545 case OP_PROP:
546 case OP_ANYNL:
547 case OP_NOT_HSPACE:
548 case OP_HSPACE:
549 case OP_NOT_VSPACE:
550 case OP_VSPACE:
551 case OP_EXTUNI:
552 case OP_EODN:
553 case OP_EOD:
554 case OP_CIRC:
555 case OP_CIRCM:
556 case OP_DOLL:
557 case OP_DOLLM:
558 case OP_CRSTAR:
559 case OP_CRMINSTAR:
560 case OP_CRPLUS:
561 case OP_CRMINPLUS:
562 case OP_CRQUERY:
563 case OP_CRMINQUERY:
564 case OP_CRRANGE:
565 case OP_CRMINRANGE:
566 case OP_CLASS:
567 case OP_NCLASS:
568 case OP_REF:
569 case OP_REFI:
570 case OP_RECURSE:
571 case OP_CALLOUT:
572 case OP_ALT:
573 case OP_KET:
574 case OP_KETRMAX:
575 case OP_KETRMIN:
576 case OP_KETRPOS:
577 case OP_REVERSE:
578 case OP_ASSERT:
579 case OP_ASSERT_NOT:
580 case OP_ASSERTBACK:
581 case OP_ASSERTBACK_NOT:
582 case OP_ONCE:
583 case OP_ONCE_NC:
584 case OP_BRA:
585 case OP_BRAPOS:
586 case OP_CBRA:
587 case OP_CBRAPOS:
588 case OP_COND:
589 case OP_SBRA:
590 case OP_SBRAPOS:
591 case OP_SCBRA:
592 case OP_SCBRAPOS:
593 case OP_SCOND:
594 case OP_CREF:
595 case OP_NCREF:
596 case OP_RREF:
597 case OP_NRREF:
598 case OP_DEF:
599 case OP_BRAZERO:
600 case OP_BRAMINZERO:
601 case OP_BRAPOSZERO:
602 case OP_PRUNE:
603 case OP_SKIP:
604 case OP_COMMIT:
605 case OP_FAIL:
606 case OP_ACCEPT:
607 case OP_ASSERT_ACCEPT:
608 case OP_CLOSE:
609 case OP_SKIPZERO:
610 return cc + PRIV(OP_lengths)[*cc];
611
612 case OP_CHAR:
613 case OP_CHARI:
614 case OP_NOT:
615 case OP_NOTI:
616 case OP_STAR:
617 case OP_MINSTAR:
618 case OP_PLUS:
619 case OP_MINPLUS:
620 case OP_QUERY:
621 case OP_MINQUERY:
622 case OP_UPTO:
623 case OP_MINUPTO:
624 case OP_EXACT:
625 case OP_POSSTAR:
626 case OP_POSPLUS:
627 case OP_POSQUERY:
628 case OP_POSUPTO:
629 case OP_STARI:
630 case OP_MINSTARI:
631 case OP_PLUSI:
632 case OP_MINPLUSI:
633 case OP_QUERYI:
634 case OP_MINQUERYI:
635 case OP_UPTOI:
636 case OP_MINUPTOI:
637 case OP_EXACTI:
638 case OP_POSSTARI:
639 case OP_POSPLUSI:
640 case OP_POSQUERYI:
641 case OP_POSUPTOI:
642 case OP_NOTSTAR:
643 case OP_NOTMINSTAR:
644 case OP_NOTPLUS:
645 case OP_NOTMINPLUS:
646 case OP_NOTQUERY:
647 case OP_NOTMINQUERY:
648 case OP_NOTUPTO:
649 case OP_NOTMINUPTO:
650 case OP_NOTEXACT:
651 case OP_NOTPOSSTAR:
652 case OP_NOTPOSPLUS:
653 case OP_NOTPOSQUERY:
654 case OP_NOTPOSUPTO:
655 case OP_NOTSTARI:
656 case OP_NOTMINSTARI:
657 case OP_NOTPLUSI:
658 case OP_NOTMINPLUSI:
659 case OP_NOTQUERYI:
660 case OP_NOTMINQUERYI:
661 case OP_NOTUPTOI:
662 case OP_NOTMINUPTOI:
663 case OP_NOTEXACTI:
664 case OP_NOTPOSSTARI:
665 case OP_NOTPOSPLUSI:
666 case OP_NOTPOSQUERYI:
667 case OP_NOTPOSUPTOI:
668 cc += PRIV(OP_lengths)[*cc];
669 #ifdef SUPPORT_UTF
670 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
671 #endif
672 return cc;
673
674 /* Special cases. */
675 case OP_TYPESTAR:
676 case OP_TYPEMINSTAR:
677 case OP_TYPEPLUS:
678 case OP_TYPEMINPLUS:
679 case OP_TYPEQUERY:
680 case OP_TYPEMINQUERY:
681 case OP_TYPEUPTO:
682 case OP_TYPEMINUPTO:
683 case OP_TYPEEXACT:
684 case OP_TYPEPOSSTAR:
685 case OP_TYPEPOSPLUS:
686 case OP_TYPEPOSQUERY:
687 case OP_TYPEPOSUPTO:
688 return cc + PRIV(OP_lengths)[*cc] - 1;
689
690 case OP_ANYBYTE:
691 #ifdef SUPPORT_UTF
692 if (common->utf) return NULL;
693 #endif
694 return cc + 1;
695
696 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
697 case OP_XCLASS:
698 return cc + GET(cc, 1);
699 #endif
700
701 case OP_MARK:
702 case OP_PRUNE_ARG:
703 case OP_SKIP_ARG:
704 return cc + 1 + 2 + cc[1];
705
706 default:
707 return NULL;
708 }
709 }
710
711 #define CASE_ITERATOR_PRIVATE_DATA_1 \
712 case OP_MINSTAR: \
713 case OP_MINPLUS: \
714 case OP_QUERY: \
715 case OP_MINQUERY: \
716 case OP_MINSTARI: \
717 case OP_MINPLUSI: \
718 case OP_QUERYI: \
719 case OP_MINQUERYI: \
720 case OP_NOTMINSTAR: \
721 case OP_NOTMINPLUS: \
722 case OP_NOTQUERY: \
723 case OP_NOTMINQUERY: \
724 case OP_NOTMINSTARI: \
725 case OP_NOTMINPLUSI: \
726 case OP_NOTQUERYI: \
727 case OP_NOTMINQUERYI:
728
729 #define CASE_ITERATOR_PRIVATE_DATA_2A \
730 case OP_STAR: \
731 case OP_PLUS: \
732 case OP_STARI: \
733 case OP_PLUSI: \
734 case OP_NOTSTAR: \
735 case OP_NOTPLUS: \
736 case OP_NOTSTARI: \
737 case OP_NOTPLUSI:
738
739 #define CASE_ITERATOR_PRIVATE_DATA_2B \
740 case OP_UPTO: \
741 case OP_MINUPTO: \
742 case OP_UPTOI: \
743 case OP_MINUPTOI: \
744 case OP_NOTUPTO: \
745 case OP_NOTMINUPTO: \
746 case OP_NOTUPTOI: \
747 case OP_NOTMINUPTOI:
748
749 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
750 case OP_TYPEMINSTAR: \
751 case OP_TYPEMINPLUS: \
752 case OP_TYPEQUERY: \
753 case OP_TYPEMINQUERY:
754
755 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
756 case OP_TYPESTAR: \
757 case OP_TYPEPLUS:
758
759 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
760 case OP_TYPEUPTO: \
761 case OP_TYPEMINUPTO:
762
763 static int get_class_iterator_size(pcre_uchar *cc)
764 {
765 switch(*cc)
766 {
767 case OP_CRSTAR:
768 case OP_CRPLUS:
769 return 2;
770
771 case OP_CRMINSTAR:
772 case OP_CRMINPLUS:
773 case OP_CRQUERY:
774 case OP_CRMINQUERY:
775 return 1;
776
777 case OP_CRRANGE:
778 case OP_CRMINRANGE:
779 if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
780 return 0;
781 return 2;
782
783 default:
784 return 0;
785 }
786 }
787
788 static int get_private_data_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
789 {
790 int private_data_length = 0;
791 pcre_uchar *alternative;
792 pcre_uchar *name;
793 pcre_uchar *end = NULL;
794 int space, size, i;
795 pcre_uint32 bracketlen;
796
797 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
798 while (cc < ccend)
799 {
800 space = 0;
801 size = 0;
802 bracketlen = 0;
803 switch(*cc)
804 {
805 case OP_SET_SOM:
806 common->has_set_som = TRUE;
807 cc += 1;
808 break;
809
810 case OP_REF:
811 case OP_REFI:
812 common->optimized_cbracket[GET2(cc, 1)] = 0;
813 cc += 1 + IMM2_SIZE;
814 break;
815
816 case OP_ASSERT:
817 case OP_ASSERT_NOT:
818 case OP_ASSERTBACK:
819 case OP_ASSERTBACK_NOT:
820 case OP_ONCE:
821 case OP_ONCE_NC:
822 case OP_BRAPOS:
823 case OP_SBRA:
824 case OP_SBRAPOS:
825 private_data_length += sizeof(sljit_sw);
826 bracketlen = 1 + LINK_SIZE;
827 break;
828
829 case OP_CBRAPOS:
830 case OP_SCBRAPOS:
831 private_data_length += sizeof(sljit_sw);
832 common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
833 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
834 break;
835
836 case OP_COND:
837 case OP_SCOND:
838 /* Only AUTO_CALLOUT can insert this opcode. We do
839 not intend to support this case. */
840 if (cc[1 + LINK_SIZE] == OP_CALLOUT)
841 return -1;
842
843 if (*cc == OP_COND)
844 {
845 /* Might be a hidden SCOND. */
846 alternative = cc + GET(cc, 1);
847 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
848 private_data_length += sizeof(sljit_sw);
849 }
850 else
851 private_data_length += sizeof(sljit_sw);
852 bracketlen = 1 + LINK_SIZE;
853 break;
854
855 case OP_CREF:
856 i = GET2(cc, 1);
857 common->optimized_cbracket[i] = 0;
858 cc += 1 + IMM2_SIZE;
859 break;
860
861 case OP_NCREF:
862 bracketlen = GET2(cc, 1);
863 name = (pcre_uchar *)common->name_table;
864 alternative = name;
865 for (i = 0; i < common->name_count; i++)
866 {
867 if (GET2(name, 0) == bracketlen) break;
868 name += common->name_entry_size;
869 }
870 SLJIT_ASSERT(i != common->name_count);
871
872 for (i = 0; i < common->name_count; i++)
873 {
874 if (STRCMP_UC_UC(alternative + IMM2_SIZE, name + IMM2_SIZE) == 0)
875 common->optimized_cbracket[GET2(alternative, 0)] = 0;
876 alternative += common->name_entry_size;
877 }
878 bracketlen = 0;
879 cc += 1 + IMM2_SIZE;
880 break;
881
882 case OP_BRA:
883 bracketlen = 1 + LINK_SIZE;
884 break;
885
886 case OP_CBRA:
887 case OP_SCBRA:
888 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
889 break;
890
891 CASE_ITERATOR_PRIVATE_DATA_1
892 space = 1;
893 size = -2;
894 break;
895
896 CASE_ITERATOR_PRIVATE_DATA_2A
897 space = 2;
898 size = -2;
899 break;
900
901 CASE_ITERATOR_PRIVATE_DATA_2B
902 space = 2;
903 size = -(2 + IMM2_SIZE);
904 break;
905
906 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
907 space = 1;
908 size = 1;
909 break;
910
911 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
912 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
913 space = 2;
914 size = 1;
915 break;
916
917 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
918 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
919 space = 2;
920 size = 1 + IMM2_SIZE;
921 break;
922
923 case OP_CLASS:
924 case OP_NCLASS:
925 size += 1 + 32 / sizeof(pcre_uchar);
926 space = get_class_iterator_size(cc + size);
927 break;
928
929 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
930 case OP_XCLASS:
931 size = GET(cc, 1);
932 space = get_class_iterator_size(cc + size);
933 break;
934 #endif
935
936 case OP_RECURSE:
937 /* Set its value only once. */
938 if (common->recursive_head_ptr == 0)
939 {
940 common->recursive_head_ptr = common->ovector_start;
941 common->ovector_start += sizeof(sljit_sw);
942 }
943 cc += 1 + LINK_SIZE;
944 break;
945
946 case OP_CALLOUT:
947 if (common->capture_last_ptr == 0)
948 {
949 common->capture_last_ptr = common->ovector_start;
950 common->ovector_start += sizeof(sljit_sw);
951 }
952 cc += 2 + 2 * LINK_SIZE;
953 break;
954
955 case OP_PRUNE_ARG:
956 common->needs_start_ptr = TRUE;
957 common->control_head_ptr = 1;
958 /* Fall through. */
959
960 case OP_MARK:
961 if (common->mark_ptr == 0)
962 {
963 common->mark_ptr = common->ovector_start;
964 common->ovector_start += sizeof(sljit_sw);
965 }
966 cc += 1 + 2 + cc[1];
967 break;
968
969 case OP_PRUNE:
970 case OP_SKIP:
971 common->needs_start_ptr = TRUE;
972 common->control_head_ptr = 1;
973 cc += 1;
974 break;
975
976 case OP_SKIP_ARG:
977 common->control_head_ptr = 1;
978 common->has_skip_arg = TRUE;
979 cc += 1 + 2 + cc[1];
980 break;
981
982 default:
983 cc = next_opcode(common, cc);
984 if (cc == NULL)
985 return -1;
986 break;
987 }
988
989 if (space > 0 && cc >= end)
990 private_data_length += sizeof(sljit_sw) * space;
991
992 if (size != 0)
993 {
994 if (size < 0)
995 {
996 cc += -size;
997 #ifdef SUPPORT_UTF
998 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
999 #endif
1000 }
1001 else
1002 cc += size;
1003 }
1004
1005 if (bracketlen != 0)
1006 {
1007 if (cc >= end)
1008 {
1009 end = bracketend(cc);
1010 if (end[-1 - LINK_SIZE] == OP_KET)
1011 end = NULL;
1012 }
1013 cc += bracketlen;
1014 }
1015 }
1016 return private_data_length;
1017 }
1018
1019 static void set_private_data_ptrs(compiler_common *common, int private_data_ptr, pcre_uchar *ccend)
1020 {
1021 pcre_uchar *cc = common->start;
1022 pcre_uchar *alternative;
1023 pcre_uchar *end = NULL;
1024 int space, size, bracketlen;
1025
1026 while (cc < ccend)
1027 {
1028 space = 0;
1029 size = 0;
1030 bracketlen = 0;
1031 switch(*cc)
1032 {
1033 case OP_ASSERT:
1034 case OP_ASSERT_NOT:
1035 case OP_ASSERTBACK:
1036 case OP_ASSERTBACK_NOT:
1037 case OP_ONCE:
1038 case OP_ONCE_NC:
1039 case OP_BRAPOS:
1040 case OP_SBRA:
1041 case OP_SBRAPOS:
1042 case OP_SCOND:
1043 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1044 private_data_ptr += sizeof(sljit_sw);
1045 bracketlen = 1 + LINK_SIZE;
1046 break;
1047
1048 case OP_CBRAPOS:
1049 case OP_SCBRAPOS:
1050 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1051 private_data_ptr += sizeof(sljit_sw);
1052 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1053 break;
1054
1055 case OP_COND:
1056 /* Might be a hidden SCOND. */
1057 alternative = cc + GET(cc, 1);
1058 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1059 {
1060 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1061 private_data_ptr += sizeof(sljit_sw);
1062 }
1063 bracketlen = 1 + LINK_SIZE;
1064 break;
1065
1066 case OP_BRA:
1067 bracketlen = 1 + LINK_SIZE;
1068 break;
1069
1070 case OP_CBRA:
1071 case OP_SCBRA:
1072 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1073 break;
1074
1075 CASE_ITERATOR_PRIVATE_DATA_1
1076 space = 1;
1077 size = -2;
1078 break;
1079
1080 CASE_ITERATOR_PRIVATE_DATA_2A
1081 space = 2;
1082 size = -2;
1083 break;
1084
1085 CASE_ITERATOR_PRIVATE_DATA_2B
1086 space = 2;
1087 size = -(2 + IMM2_SIZE);
1088 break;
1089
1090 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1091 space = 1;
1092 size = 1;
1093 break;
1094
1095 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1096 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1097 space = 2;
1098 size = 1;
1099 break;
1100
1101 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1102 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1103 space = 2;
1104 size = 1 + IMM2_SIZE;
1105 break;
1106
1107 case OP_CLASS:
1108 case OP_NCLASS:
1109 size += 1 + 32 / sizeof(pcre_uchar);
1110 space = get_class_iterator_size(cc + size);
1111 break;
1112
1113 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1114 case OP_XCLASS:
1115 size = GET(cc, 1);
1116 space = get_class_iterator_size(cc + size);
1117 break;
1118 #endif
1119
1120 default:
1121 cc = next_opcode(common, cc);
1122 SLJIT_ASSERT(cc != NULL);
1123 break;
1124 }
1125
1126 if (space > 0 && cc >= end)
1127 {
1128 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1129 private_data_ptr += sizeof(sljit_sw) * space;
1130 }
1131
1132 if (size != 0)
1133 {
1134 if (size < 0)
1135 {
1136 cc += -size;
1137 #ifdef SUPPORT_UTF
1138 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1139 #endif
1140 }
1141 else
1142 cc += size;
1143 }
1144
1145 if (bracketlen > 0)
1146 {
1147 if (cc >= end)
1148 {
1149 end = bracketend(cc);
1150 if (end[-1 - LINK_SIZE] == OP_KET)
1151 end = NULL;
1152 }
1153 cc += bracketlen;
1154 }
1155 }
1156 }
1157
1158 /* Returns with a frame_types (always < 0) if no need for frame. */
1159 static int get_framesize(compiler_common *common, pcre_uchar *cc, BOOL recursive, BOOL* needs_control_head)
1160 {
1161 pcre_uchar *ccend = bracketend(cc) - (1 + LINK_SIZE);
1162 int length = 0;
1163 int possessive = 0;
1164 BOOL stack_restore = FALSE;
1165 BOOL setsom_found = recursive;
1166 BOOL setmark_found = recursive;
1167 /* The last capture is a local variable even for recursions. */
1168 BOOL capture_last_found = FALSE;
1169
1170 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
1171 SLJIT_ASSERT(common->control_head_ptr != 0);
1172 *needs_control_head = TRUE;
1173 #else
1174 *needs_control_head = FALSE;
1175 #endif
1176
1177 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1178 {
1179 possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1180 /* This is correct regardless of common->capture_last_ptr. */
1181 capture_last_found = TRUE;
1182 }
1183
1184 cc = next_opcode(common, cc);
1185 SLJIT_ASSERT(cc != NULL);
1186 while (cc < ccend)
1187 switch(*cc)
1188 {
1189 case OP_SET_SOM:
1190 SLJIT_ASSERT(common->has_set_som);
1191 stack_restore = TRUE;
1192 if (!setsom_found)
1193 {
1194 length += 2;
1195 setsom_found = TRUE;
1196 }
1197 cc += 1;
1198 break;
1199
1200 case OP_MARK:
1201 case OP_PRUNE_ARG:
1202 SLJIT_ASSERT(common->mark_ptr != 0);
1203 stack_restore = TRUE;
1204 if (!setmark_found)
1205 {
1206 length += 2;
1207 setmark_found = TRUE;
1208 }
1209 if (common->control_head_ptr != 0)
1210 *needs_control_head = TRUE;
1211 cc += 1 + 2 + cc[1];
1212 break;
1213
1214 case OP_RECURSE:
1215 stack_restore = TRUE;
1216 if (common->has_set_som && !setsom_found)
1217 {
1218 length += 2;
1219 setsom_found = TRUE;
1220 }
1221 if (common->mark_ptr != 0 && !setmark_found)
1222 {
1223 length += 2;
1224 setmark_found = TRUE;
1225 }
1226 if (common->capture_last_ptr != 0 && !capture_last_found)
1227 {
1228 length += 2;
1229 capture_last_found = TRUE;
1230 }
1231 cc += 1 + LINK_SIZE;
1232 break;
1233
1234 case OP_CBRA:
1235 case OP_CBRAPOS:
1236 case OP_SCBRA:
1237 case OP_SCBRAPOS:
1238 stack_restore = TRUE;
1239 if (common->capture_last_ptr != 0 && !capture_last_found)
1240 {
1241 length += 2;
1242 capture_last_found = TRUE;
1243 }
1244 length += 3;
1245 cc += 1 + LINK_SIZE + IMM2_SIZE;
1246 break;
1247
1248 case OP_PRUNE:
1249 case OP_SKIP:
1250 case OP_SKIP_ARG:
1251 case OP_COMMIT:
1252 if (common->control_head_ptr != 0)
1253 *needs_control_head = TRUE;
1254 /* Fall through. */
1255
1256 default:
1257 stack_restore = TRUE;
1258 /* Fall through. */
1259
1260 case OP_NOT_WORD_BOUNDARY:
1261 case OP_WORD_BOUNDARY:
1262 case OP_NOT_DIGIT:
1263 case OP_DIGIT:
1264 case OP_NOT_WHITESPACE:
1265 case OP_WHITESPACE:
1266 case OP_NOT_WORDCHAR:
1267 case OP_WORDCHAR:
1268 case OP_ANY:
1269 case OP_ALLANY:
1270 case OP_ANYBYTE:
1271 case OP_NOTPROP:
1272 case OP_PROP:
1273 case OP_ANYNL:
1274 case OP_NOT_HSPACE:
1275 case OP_HSPACE:
1276 case OP_NOT_VSPACE:
1277 case OP_VSPACE:
1278 case OP_EXTUNI:
1279 case OP_EODN:
1280 case OP_EOD:
1281 case OP_CIRC:
1282 case OP_CIRCM:
1283 case OP_DOLL:
1284 case OP_DOLLM:
1285 case OP_CHAR:
1286 case OP_CHARI:
1287 case OP_NOT:
1288 case OP_NOTI:
1289
1290 case OP_EXACT:
1291 case OP_POSSTAR:
1292 case OP_POSPLUS:
1293 case OP_POSQUERY:
1294 case OP_POSUPTO:
1295
1296 case OP_EXACTI:
1297 case OP_POSSTARI:
1298 case OP_POSPLUSI:
1299 case OP_POSQUERYI:
1300 case OP_POSUPTOI:
1301
1302 case OP_NOTEXACT:
1303 case OP_NOTPOSSTAR:
1304 case OP_NOTPOSPLUS:
1305 case OP_NOTPOSQUERY:
1306 case OP_NOTPOSUPTO:
1307
1308 case OP_NOTEXACTI:
1309 case OP_NOTPOSSTARI:
1310 case OP_NOTPOSPLUSI:
1311 case OP_NOTPOSQUERYI:
1312 case OP_NOTPOSUPTOI:
1313
1314 case OP_TYPEEXACT:
1315 case OP_TYPEPOSSTAR:
1316 case OP_TYPEPOSPLUS:
1317 case OP_TYPEPOSQUERY:
1318 case OP_TYPEPOSUPTO:
1319
1320 case OP_CLASS:
1321 case OP_NCLASS:
1322 case OP_XCLASS:
1323
1324 cc = next_opcode(common, cc);
1325 SLJIT_ASSERT(cc != NULL);
1326 break;
1327 }
1328
1329 /* Possessive quantifiers can use a special case. */
1330 if (SLJIT_UNLIKELY(possessive == length))
1331 return stack_restore ? no_frame : no_stack;
1332
1333 if (length > 0)
1334 return length + 1;
1335 return stack_restore ? no_frame : no_stack;
1336 }
1337
1338 static void init_frame(compiler_common *common, pcre_uchar *cc, int stackpos, int stacktop, BOOL recursive)
1339 {
1340 DEFINE_COMPILER;
1341 pcre_uchar *ccend = bracketend(cc) - (1 + LINK_SIZE);
1342 BOOL setsom_found = recursive;
1343 BOOL setmark_found = recursive;
1344 /* The last capture is a local variable even for recursions. */
1345 BOOL capture_last_found = FALSE;
1346 int offset;
1347
1348 /* >= 1 + shortest item size (2) */
1349 SLJIT_UNUSED_ARG(stacktop);
1350 SLJIT_ASSERT(stackpos >= stacktop + 2);
1351
1352 stackpos = STACK(stackpos);
1353 if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1354 cc = next_opcode(common, cc);
1355 SLJIT_ASSERT(cc != NULL);
1356 while (cc < ccend)
1357 switch(*cc)
1358 {
1359 case OP_SET_SOM:
1360 SLJIT_ASSERT(common->has_set_som);
1361 if (!setsom_found)
1362 {
1363 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1364 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1365 stackpos += (int)sizeof(sljit_sw);
1366 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1367 stackpos += (int)sizeof(sljit_sw);
1368 setsom_found = TRUE;
1369 }
1370 cc += 1;
1371 break;
1372
1373 case OP_MARK:
1374 case OP_PRUNE_ARG:
1375 SLJIT_ASSERT(common->mark_ptr != 0);
1376 if (!setmark_found)
1377 {
1378 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1379 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1380 stackpos += (int)sizeof(sljit_sw);
1381 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1382 stackpos += (int)sizeof(sljit_sw);
1383 setmark_found = TRUE;
1384 }
1385 cc += 1 + 2 + cc[1];
1386 break;
1387
1388 case OP_RECURSE:
1389 if (common->has_set_som && !setsom_found)
1390 {
1391 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1392 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1393 stackpos += (int)sizeof(sljit_sw);
1394 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1395 stackpos += (int)sizeof(sljit_sw);
1396 setsom_found = TRUE;
1397 }
1398 if (common->mark_ptr != 0 && !setmark_found)
1399 {
1400 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1401 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1402 stackpos += (int)sizeof(sljit_sw);
1403 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1404 stackpos += (int)sizeof(sljit_sw);
1405 setmark_found = TRUE;
1406 }
1407 if (common->capture_last_ptr != 0 && !capture_last_found)
1408 {
1409 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
1410 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1411 stackpos += (int)sizeof(sljit_sw);
1412 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1413 stackpos += (int)sizeof(sljit_sw);
1414 capture_last_found = TRUE;
1415 }
1416 cc += 1 + LINK_SIZE;
1417 break;
1418
1419 case OP_CBRA:
1420 case OP_CBRAPOS:
1421 case OP_SCBRA:
1422 case OP_SCBRAPOS:
1423 if (common->capture_last_ptr != 0 && !capture_last_found)
1424 {
1425 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
1426 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1427 stackpos += (int)sizeof(sljit_sw);
1428 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1429 stackpos += (int)sizeof(sljit_sw);
1430 capture_last_found = TRUE;
1431 }
1432 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1433 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1434 stackpos += (int)sizeof(sljit_sw);
1435 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
1436 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
1437 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1438 stackpos += (int)sizeof(sljit_sw);
1439 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1440 stackpos += (int)sizeof(sljit_sw);
1441
1442 cc += 1 + LINK_SIZE + IMM2_SIZE;
1443 break;
1444
1445 default:
1446 cc = next_opcode(common, cc);
1447 SLJIT_ASSERT(cc != NULL);
1448 break;
1449 }
1450
1451 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
1452 SLJIT_ASSERT(stackpos == STACK(stacktop));
1453 }
1454
1455 static SLJIT_INLINE int get_private_data_copy_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL needs_control_head)
1456 {
1457 int private_data_length = needs_control_head ? 3 : 2;
1458 int size;
1459 pcre_uchar *alternative;
1460 /* Calculate the sum of the private machine words. */
1461 while (cc < ccend)
1462 {
1463 size = 0;
1464 switch(*cc)
1465 {
1466 case OP_ASSERT:
1467 case OP_ASSERT_NOT:
1468 case OP_ASSERTBACK:
1469 case OP_ASSERTBACK_NOT:
1470 case OP_ONCE:
1471 case OP_ONCE_NC:
1472 case OP_BRAPOS:
1473 case OP_SBRA:
1474 case OP_SBRAPOS:
1475 case OP_SCOND:
1476 private_data_length++;
1477 cc += 1 + LINK_SIZE;
1478 break;
1479
1480 case OP_CBRA:
1481 case OP_SCBRA:
1482 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1483 private_data_length++;
1484 cc += 1 + LINK_SIZE + IMM2_SIZE;
1485 break;
1486
1487 case OP_CBRAPOS:
1488 case OP_SCBRAPOS:
1489 private_data_length += 2;
1490 cc += 1 + LINK_SIZE + IMM2_SIZE;
1491 break;
1492
1493 case OP_COND:
1494 /* Might be a hidden SCOND. */
1495 alternative = cc + GET(cc, 1);
1496 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1497 private_data_length++;
1498 cc += 1 + LINK_SIZE;
1499 break;
1500
1501 CASE_ITERATOR_PRIVATE_DATA_1
1502 if (PRIVATE_DATA(cc))
1503 private_data_length++;
1504 cc += 2;
1505 #ifdef SUPPORT_UTF
1506 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1507 #endif
1508 break;
1509
1510 CASE_ITERATOR_PRIVATE_DATA_2A
1511 if (PRIVATE_DATA(cc))
1512 private_data_length += 2;
1513 cc += 2;
1514 #ifdef SUPPORT_UTF
1515 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1516 #endif
1517 break;
1518
1519 CASE_ITERATOR_PRIVATE_DATA_2B
1520 if (PRIVATE_DATA(cc))
1521 private_data_length += 2;
1522 cc += 2 + IMM2_SIZE;
1523 #ifdef SUPPORT_UTF
1524 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1525 #endif
1526 break;
1527
1528 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1529 if (PRIVATE_DATA(cc))
1530 private_data_length++;
1531 cc += 1;
1532 break;
1533
1534 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1535 if (PRIVATE_DATA(cc))
1536 private_data_length += 2;
1537 cc += 1;
1538 break;
1539
1540 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1541 if (PRIVATE_DATA(cc))
1542 private_data_length += 2;
1543 cc += 1 + IMM2_SIZE;
1544 break;
1545
1546 case OP_CLASS:
1547 case OP_NCLASS:
1548 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1549 case OP_XCLASS:
1550 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1551 #else
1552 size = 1 + 32 / (int)sizeof(pcre_uchar);
1553 #endif
1554 if (PRIVATE_DATA(cc))
1555 private_data_length += get_class_iterator_size(cc + size);
1556 cc += size;
1557 break;
1558
1559 default:
1560 cc = next_opcode(common, cc);
1561 SLJIT_ASSERT(cc != NULL);
1562 break;
1563 }
1564 }
1565 SLJIT_ASSERT(cc == ccend);
1566 return private_data_length;
1567 }
1568
1569 static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1570 BOOL save, int stackptr, int stacktop, BOOL needs_control_head)
1571 {
1572 DEFINE_COMPILER;
1573 int srcw[2];
1574 int count, size;
1575 BOOL tmp1next = TRUE;
1576 BOOL tmp1empty = TRUE;
1577 BOOL tmp2empty = TRUE;
1578 pcre_uchar *alternative;
1579 enum {
1580 start,
1581 loop,
1582 end
1583 } status;
1584
1585 status = save ? start : loop;
1586 stackptr = STACK(stackptr - 2);
1587 stacktop = STACK(stacktop - 1);
1588
1589 if (!save)
1590 {
1591 stackptr += (needs_control_head ? 2 : 1) * sizeof(sljit_sw);
1592 if (stackptr < stacktop)
1593 {
1594 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1595 stackptr += sizeof(sljit_sw);
1596 tmp1empty = FALSE;
1597 }
1598 if (stackptr < stacktop)
1599 {
1600 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1601 stackptr += sizeof(sljit_sw);
1602 tmp2empty = FALSE;
1603 }
1604 /* The tmp1next must be TRUE in either way. */
1605 }
1606
1607 do
1608 {
1609 count = 0;
1610 switch(status)
1611 {
1612 case start:
1613 SLJIT_ASSERT(save && common->recursive_head_ptr != 0);
1614 count = 1;
1615 srcw[0] = common->recursive_head_ptr;
1616 if (needs_control_head)
1617 {
1618 SLJIT_ASSERT(common->control_head_ptr != 0);
1619 count = 2;
1620 srcw[1] = common->control_head_ptr;
1621 }
1622 status = loop;
1623 break;
1624
1625 case loop:
1626 if (cc >= ccend)
1627 {
1628 status = end;
1629 break;
1630 }
1631
1632 switch(*cc)
1633 {
1634 case OP_ASSERT:
1635 case OP_ASSERT_NOT:
1636 case OP_ASSERTBACK:
1637 case OP_ASSERTBACK_NOT:
1638 case OP_ONCE:
1639 case OP_ONCE_NC:
1640 case OP_BRAPOS:
1641 case OP_SBRA:
1642 case OP_SBRAPOS:
1643 case OP_SCOND:
1644 count = 1;
1645 srcw[0] = PRIVATE_DATA(cc);
1646 SLJIT_ASSERT(srcw[0] != 0);
1647 cc += 1 + LINK_SIZE;
1648 break;
1649
1650 case OP_CBRA:
1651 case OP_SCBRA:
1652 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1653 {
1654 count = 1;
1655 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1656 }
1657 cc += 1 + LINK_SIZE + IMM2_SIZE;
1658 break;
1659
1660 case OP_CBRAPOS:
1661 case OP_SCBRAPOS:
1662 count = 2;
1663 srcw[0] = PRIVATE_DATA(cc);
1664 srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1665 SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0);
1666 cc += 1 + LINK_SIZE + IMM2_SIZE;
1667 break;
1668
1669 case OP_COND:
1670 /* Might be a hidden SCOND. */
1671 alternative = cc + GET(cc, 1);
1672 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1673 {
1674 count = 1;
1675 srcw[0] = PRIVATE_DATA(cc);
1676 SLJIT_ASSERT(srcw[0] != 0);
1677 }
1678 cc += 1 + LINK_SIZE;
1679 break;
1680
1681 CASE_ITERATOR_PRIVATE_DATA_1
1682 if (PRIVATE_DATA(cc))
1683 {
1684 count = 1;
1685 srcw[0] = PRIVATE_DATA(cc);
1686 }
1687 cc += 2;
1688 #ifdef SUPPORT_UTF
1689 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1690 #endif
1691 break;
1692
1693 CASE_ITERATOR_PRIVATE_DATA_2A
1694 if (PRIVATE_DATA(cc))
1695 {
1696 count = 2;
1697 srcw[0] = PRIVATE_DATA(cc);
1698 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1699 }
1700 cc += 2;
1701 #ifdef SUPPORT_UTF
1702 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1703 #endif
1704 break;
1705
1706 CASE_ITERATOR_PRIVATE_DATA_2B
1707 if (PRIVATE_DATA(cc))
1708 {
1709 count = 2;
1710 srcw[0] = PRIVATE_DATA(cc);
1711 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1712 }
1713 cc += 2 + IMM2_SIZE;
1714 #ifdef SUPPORT_UTF
1715 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1716 #endif
1717 break;
1718
1719 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1720 if (PRIVATE_DATA(cc))
1721 {
1722 count = 1;
1723 srcw[0] = PRIVATE_DATA(cc);
1724 }
1725 cc += 1;
1726 break;
1727
1728 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1729 if (PRIVATE_DATA(cc))
1730 {
1731 count = 2;
1732 srcw[0] = PRIVATE_DATA(cc);
1733 srcw[1] = srcw[0] + sizeof(sljit_sw);
1734 }
1735 cc += 1;
1736 break;
1737
1738 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1739 if (PRIVATE_DATA(cc))
1740 {
1741 count = 2;
1742 srcw[0] = PRIVATE_DATA(cc);
1743 srcw[1] = srcw[0] + sizeof(sljit_sw);
1744 }
1745 cc += 1 + IMM2_SIZE;
1746 break;
1747
1748 case OP_CLASS:
1749 case OP_NCLASS:
1750 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1751 case OP_XCLASS:
1752 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1753 #else
1754 size = 1 + 32 / (int)sizeof(pcre_uchar);
1755 #endif
1756 if (PRIVATE_DATA(cc))
1757 switch(get_class_iterator_size(cc + size))
1758 {
1759 case 1:
1760 count = 1;
1761 srcw[0] = PRIVATE_DATA(cc);
1762 break;
1763
1764 case 2:
1765 count = 2;
1766 srcw[0] = PRIVATE_DATA(cc);
1767 srcw[1] = srcw[0] + sizeof(sljit_sw);
1768 break;
1769
1770 default:
1771 SLJIT_ASSERT_STOP();
1772 break;
1773 }
1774 cc += size;
1775 break;
1776
1777 default:
1778 cc = next_opcode(common, cc);
1779 SLJIT_ASSERT(cc != NULL);
1780 break;
1781 }
1782 break;
1783
1784 case end:
1785 SLJIT_ASSERT_STOP();
1786 break;
1787 }
1788
1789 while (count > 0)
1790 {
1791 count--;
1792 if (save)
1793 {
1794 if (tmp1next)
1795 {
1796 if (!tmp1empty)
1797 {
1798 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1799 stackptr += sizeof(sljit_sw);
1800 }
1801 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1802 tmp1empty = FALSE;
1803 tmp1next = FALSE;
1804 }
1805 else
1806 {
1807 if (!tmp2empty)
1808 {
1809 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1810 stackptr += sizeof(sljit_sw);
1811 }
1812 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1813 tmp2empty = FALSE;
1814 tmp1next = TRUE;
1815 }
1816 }
1817 else
1818 {
1819 if (tmp1next)
1820 {
1821 SLJIT_ASSERT(!tmp1empty);
1822 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP1, 0);
1823 tmp1empty = stackptr >= stacktop;
1824 if (!tmp1empty)
1825 {
1826 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1827 stackptr += sizeof(sljit_sw);
1828 }
1829 tmp1next = FALSE;
1830 }
1831 else
1832 {
1833 SLJIT_ASSERT(!tmp2empty);
1834 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP2, 0);
1835 tmp2empty = stackptr >= stacktop;
1836 if (!tmp2empty)
1837 {
1838 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1839 stackptr += sizeof(sljit_sw);
1840 }
1841 tmp1next = TRUE;
1842 }
1843 }
1844 }
1845 }
1846 while (status != end);
1847
1848 if (save)
1849 {
1850 if (tmp1next)
1851 {
1852 if (!tmp1empty)
1853 {
1854 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1855 stackptr += sizeof(sljit_sw);
1856 }
1857 if (!tmp2empty)
1858 {
1859 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1860 stackptr += sizeof(sljit_sw);
1861 }
1862 }
1863 else
1864 {
1865 if (!tmp2empty)
1866 {
1867 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1868 stackptr += sizeof(sljit_sw);
1869 }
1870 if (!tmp1empty)
1871 {
1872 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1873 stackptr += sizeof(sljit_sw);
1874 }
1875 }
1876 }
1877 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1878 }
1879
1880 #undef CASE_ITERATOR_PRIVATE_DATA_1
1881 #undef CASE_ITERATOR_PRIVATE_DATA_2A
1882 #undef CASE_ITERATOR_PRIVATE_DATA_2B
1883 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1884 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1885 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1886
1887 static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
1888 {
1889 return (value & (value - 1)) == 0;
1890 }
1891
1892 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
1893 {
1894 while (list)
1895 {
1896 /* sljit_set_label is clever enough to do nothing
1897 if either the jump or the label is NULL. */
1898 SET_LABEL(list->jump, label);
1899 list = list->next;
1900 }
1901 }
1902
1903 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump* jump)
1904 {
1905 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
1906 if (list_item)
1907 {
1908 list_item->next = *list;
1909 list_item->jump = jump;
1910 *list = list_item;
1911 }
1912 }
1913
1914 static void add_stub(compiler_common *common, struct sljit_jump *start)
1915 {
1916 DEFINE_COMPILER;
1917 stub_list* list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
1918
1919 if (list_item)
1920 {
1921 list_item->start = start;
1922 list_item->quit = LABEL();
1923 list_item->next = common->stubs;
1924 common->stubs = list_item;
1925 }
1926 }
1927
1928 static void flush_stubs(compiler_common *common)
1929 {
1930 DEFINE_COMPILER;
1931 stub_list* list_item = common->stubs;
1932
1933 while (list_item)
1934 {
1935 JUMPHERE(list_item->start);
1936 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
1937 JUMPTO(SLJIT_JUMP, list_item->quit);
1938 list_item = list_item->next;
1939 }
1940 common->stubs = NULL;
1941 }
1942
1943 static SLJIT_INLINE void decrease_call_count(compiler_common *common)
1944 {
1945 DEFINE_COMPILER;
1946
1947 OP2(SLJIT_SUB | SLJIT_SET_E, CALL_COUNT, 0, CALL_COUNT, 0, SLJIT_IMM, 1);
1948 add_jump(compiler, &common->calllimit, JUMP(SLJIT_C_ZERO));
1949 }
1950
1951 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
1952 {
1953 /* May destroy all locals and registers except TMP2. */
1954 DEFINE_COMPILER;
1955
1956 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
1957 #ifdef DESTROY_REGISTERS
1958 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
1959 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
1960 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
1961 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, TMP1, 0);
1962 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
1963 #endif
1964 add_stub(common, CMP(SLJIT_C_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
1965 }
1966
1967 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
1968 {
1969 DEFINE_COMPILER;
1970 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
1971 }
1972
1973 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
1974 {
1975 DEFINE_COMPILER;
1976 struct sljit_label *loop;
1977 int i;
1978
1979 /* At this point we can freely use all temporary registers. */
1980 SLJIT_ASSERT(length > 1);
1981 /* TMP1 returns with begin - 1. */
1982 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
1983 if (length < 8)
1984 {
1985 for (i = 1; i < length; i++)
1986 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), SLJIT_SCRATCH_REG1, 0);
1987 }
1988 else
1989 {
1990 GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, OVECTOR_START);
1991 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, length - 1);
1992 loop = LABEL();
1993 OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(sljit_sw), SLJIT_SCRATCH_REG1, 0);
1994 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 1);
1995 JUMPTO(SLJIT_C_NOT_ZERO, loop);
1996 }
1997 }
1998
1999 static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
2000 {
2001 DEFINE_COMPILER;
2002 struct sljit_label *loop;
2003 int i;
2004
2005 SLJIT_ASSERT(length > 1);
2006 /* OVECTOR(1) contains the "string begin - 1" constant. */
2007 if (length > 2)
2008 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
2009 if (length < 8)
2010 {
2011 for (i = 2; i < length; i++)
2012 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), TMP1, 0);
2013 }
2014 else
2015 {
2016 GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
2017 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
2018 loop = LABEL();
2019 OP1(SLJIT_MOVU, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
2020 OP2(SLJIT_SUB | SLJIT_SET_E, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
2021 JUMPTO(SLJIT_C_NOT_ZERO, loop);
2022 }
2023
2024 OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
2025 if (common->mark_ptr != 0)
2026 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, SLJIT_IMM, 0);
2027 SLJIT_ASSERT(common->control_head_ptr != 0);
2028 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
2029 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
2030 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_ptr);
2031 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, base));
2032 }
2033
2034 static sljit_sw SLJIT_CALL do_check_control_chain(sljit_sw *current)
2035 {
2036 sljit_sw return_value = 0;
2037 const pcre_uchar *skip_arg = NULL;
2038
2039 SLJIT_ASSERT(current != NULL);
2040 do
2041 {
2042 switch (current[-2])
2043 {
2044 case type_commit:
2045 /* Commit overwrites all. */
2046 return -1;
2047
2048 case type_prune:
2049 break;
2050
2051 case type_skip:
2052 /* Overwrites prune, but not other skips. */
2053 if (return_value == 0 && skip_arg == NULL)
2054 return_value = current[-3];
2055 break;
2056
2057 case type_skip_arg:
2058 if (return_value == 0 && skip_arg == NULL)
2059 skip_arg = (pcre_uchar *)current[-3];
2060 break;
2061
2062 case type_mark:
2063 if (return_value == 0 && skip_arg != NULL)
2064 if (STRCMP_UC_UC(skip_arg, (pcre_uchar *)current[-3]) == 0)
2065 return_value = current[-4];
2066 break;
2067
2068 default:
2069 SLJIT_ASSERT_STOP();
2070 break;
2071 }
2072 current = (sljit_sw*)current[-1];
2073 }
2074 while (current != NULL);
2075 return (return_value != 0 || skip_arg == NULL) ? return_value : -2;
2076 }
2077
2078 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
2079 {
2080 DEFINE_COMPILER;
2081 struct sljit_label *loop;
2082 struct sljit_jump *early_quit;
2083
2084 /* At this point we can freely use all registers. */
2085 OP1(SLJIT_MOV, SLJIT_SAVED_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
2086 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1), STR_PTR, 0);
2087
2088 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, ARGUMENTS, 0);
2089 if (common->mark_ptr != 0)
2090 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
2091 OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offset_count));
2092 if (common->mark_ptr != 0)
2093 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_SCRATCH_REG3, 0);
2094 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
2095 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, begin));
2096 GET_LOCAL_BASE(SLJIT_SAVED_REG1, 0, OVECTOR_START);
2097 /* Unlikely, but possible */
2098 early_quit = CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 0);
2099 loop = LABEL();
2100 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), 0, SLJIT_SCRATCH_REG1, 0);
2101 OP2(SLJIT_ADD, SLJIT_SAVED_REG1, 0, SLJIT_SAVED_REG1, 0, SLJIT_IMM, sizeof(sljit_sw));
2102 /* Copy the integer value to the output buffer */
2103 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2104 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
2105 #endif
2106 OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG3), sizeof(int), SLJIT_SAVED_REG2, 0);
2107 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1);
2108 JUMPTO(SLJIT_C_NOT_ZERO, loop);
2109 JUMPHERE(early_quit);
2110
2111 /* Calculate the return value, which is the maximum ovector value. */
2112 if (topbracket > 1)
2113 {
2114 GET_LOCAL_BASE(SLJIT_SCRATCH_REG1, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
2115 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, topbracket + 1);
2116
2117 /* OVECTOR(0) is never equal to SLJIT_SAVED_REG3. */
2118 loop = LABEL();
2119 OP1(SLJIT_MOVU, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), -(2 * (sljit_sw)sizeof(sljit_sw)));
2120 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1);
2121 CMPTO(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG3, 0, loop);
2122 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_SCRATCH_REG2, 0);
2123 }
2124 else
2125 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
2126 }
2127
2128 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
2129 {
2130 DEFINE_COMPILER;
2131 struct sljit_jump *jump;
2132
2133 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_SAVED_REG2, str_end_must_be_saved_reg2);
2134 SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
2135 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
2136
2137 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0);
2138 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
2139 OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, real_offset_count));
2140 CMPTO(SLJIT_C_SIG_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 2, quit);
2141
2142 /* Store match begin and end. */
2143 OP1(SLJIT_MOV, SLJIT_SAVED_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, begin));
2144 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, offsets));
2145
2146 jump = CMP(SLJIT_C_SIG_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 3);
2147 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_ptr : (common->hit_start + (int)sizeof(sljit_sw)), SLJIT_SAVED_REG1, 0);
2148 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2149 OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);
2150 #endif
2151 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), 2 * sizeof(int), SLJIT_SCRATCH_REG3, 0);
2152 JUMPHERE(jump);
2153
2154 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
2155 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, STR_END, 0, SLJIT_SAVED_REG1, 0);
2156 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2157 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
2158 #endif
2159 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(int), SLJIT_SAVED_REG2, 0);
2160
2161 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG1, 0);
2162 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2163 OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);
2164 #endif
2165 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), 0, SLJIT_SCRATCH_REG3, 0);
2166
2167 JUMPTO(SLJIT_JUMP, quit);
2168 }
2169
2170 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
2171 {
2172 /* May destroy TMP1. */
2173 DEFINE_COMPILER;
2174 struct sljit_jump *jump;
2175
2176 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2177 {
2178 /* The value of -1 must be kept for start_used_ptr! */
2179 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, 1);
2180 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
2181 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
2182 jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
2183 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2184 JUMPHERE(jump);
2185 }
2186 else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
2187 {
2188 jump = CMP(SLJIT_C_LESS_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2189 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2190 JUMPHERE(jump);
2191 }
2192 }
2193
2194 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar* cc)
2195 {
2196 /* Detects if the character has an othercase. */
2197 unsigned int c;
2198
2199 #ifdef SUPPORT_UTF
2200 if (common->utf)
2201 {
2202 GETCHAR(c, cc);
2203 if (c > 127)
2204 {
2205 #ifdef SUPPORT_UCP
2206 return c != UCD_OTHERCASE(c);
2207 #else
2208 return FALSE;
2209 #endif
2210 }
2211 #ifndef COMPILE_PCRE8
2212 return common->fcc[c] != c;
2213 #endif
2214 }
2215 else
2216 #endif
2217 c = *cc;
2218 return MAX_255(c) ? common->fcc[c] != c : FALSE;
2219 }
2220
2221 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
2222 {
2223 /* Returns with the othercase. */
2224 #ifdef SUPPORT_UTF
2225 if (common->utf && c > 127)
2226 {
2227 #ifdef SUPPORT_UCP
2228 return UCD_OTHERCASE(c);
2229 #else
2230 return c;
2231 #endif
2232 }
2233 #endif
2234 return TABLE_GET(c, common->fcc, c);
2235 }
2236
2237 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar* cc)
2238 {
2239 /* Detects if the character and its othercase has only 1 bit difference. */
2240 unsigned int c, oc, bit;
2241 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2242 int n;
2243 #endif
2244
2245 #ifdef SUPPORT_UTF
2246 if (common->utf)
2247 {
2248 GETCHAR(c, cc);
2249 if (c <= 127)
2250 oc = common->fcc[c];
2251 else
2252 {
2253 #ifdef SUPPORT_UCP
2254 oc = UCD_OTHERCASE(c);
2255 #else
2256 oc = c;
2257 #endif
2258 }
2259 }
2260 else
2261 {
2262 c = *cc;
2263 oc = TABLE_GET(c, common->fcc, c);
2264 }
2265 #else
2266 c = *cc;
2267 oc = TABLE_GET(c, common->fcc, c);
2268 #endif
2269
2270 SLJIT_ASSERT(c != oc);
2271
2272 bit = c ^ oc;
2273 /* Optimized for English alphabet. */
2274 if (c <= 127 && bit == 0x20)
2275 return (0 << 8) | 0x20;
2276
2277 /* Since c != oc, they must have at least 1 bit difference. */
2278 if (!is_powerof2(bit))
2279 return 0;
2280
2281 #if defined COMPILE_PCRE8
2282
2283 #ifdef SUPPORT_UTF
2284 if (common->utf && c > 127)
2285 {
2286 n = GET_EXTRALEN(*cc);
2287 while ((bit & 0x3f) == 0)
2288 {
2289 n--;
2290 bit >>= 6;
2291 }
2292 return (n << 8) | bit;
2293 }
2294 #endif /* SUPPORT_UTF */
2295 return (0 << 8) | bit;
2296
2297 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2298
2299 #ifdef SUPPORT_UTF
2300 if (common->utf && c > 65535)
2301 {
2302 if (bit >= (1 << 10))
2303 bit >>= 10;
2304 else
2305 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
2306 }
2307 #endif /* SUPPORT_UTF */
2308 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
2309
2310 #endif /* COMPILE_PCRE[8|16|32] */
2311 }
2312
2313 static void check_partial(compiler_common *common, BOOL force)
2314 {
2315 /* Checks whether a partial matching is occured. Does not modify registers. */
2316 DEFINE_COMPILER;
2317 struct sljit_jump *jump = NULL;
2318
2319 SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
2320
2321 if (common->mode == JIT_COMPILE)
2322 return;
2323
2324 if (!force)
2325 jump = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2326 else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2327 jump = CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, -1);
2328
2329 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2330 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2331 else
2332 {
2333 if (common->partialmatchlabel != NULL)
2334 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2335 else
2336 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2337 }
2338
2339 if (jump != NULL)
2340 JUMPHERE(jump);
2341 }
2342
2343 static void check_str_end(compiler_common *common, jump_list **end_reached)
2344 {
2345 /* Does not affect registers. Usually used in a tight spot. */
2346 DEFINE_COMPILER;
2347 struct sljit_jump *jump;
2348
2349 if (common->mode == JIT_COMPILE)
2350 {
2351 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2352 return;
2353 }
2354
2355 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2356 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2357 {
2358 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2359 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2360 add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
2361 }
2362 else
2363 {
2364 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2365 if (common->partialmatchlabel != NULL)
2366 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2367 else
2368 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2369 }
2370 JUMPHERE(jump);
2371 }
2372
2373 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
2374 {
2375 DEFINE_COMPILER;
2376 struct sljit_jump *jump;
2377
2378 if (common->mode == JIT_COMPILE)
2379 {
2380 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2381 return;
2382 }
2383
2384 /* Partial matching mode. */
2385 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2386 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2387 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2388 {
2389 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2390 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2391 }
2392 else
2393 {
2394 if (common->partialmatchlabel != NULL)
2395 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2396 else
2397 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2398 }
2399 JUMPHERE(jump);
2400 }
2401
2402 static void read_char(compiler_common *common)
2403 {
2404 /* Reads the character into TMP1, updates STR_PTR.
2405 Does not check STR_END. TMP2 Destroyed. */
2406 DEFINE_COMPILER;
2407 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2408 struct sljit_jump *jump;
2409 #endif
2410
2411 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2412 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2413 if (common->utf)
2414 {
2415 #if defined COMPILE_PCRE8
2416 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2417 #elif defined COMPILE_PCRE16
2418 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2419 #endif /* COMPILE_PCRE[8|16] */
2420 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2421 JUMPHERE(jump);
2422 }
2423 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2424 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2425 }
2426
2427 static void peek_char(compiler_common *common)
2428 {
2429 /* Reads the character into TMP1, keeps STR_PTR.
2430 Does not check STR_END. TMP2 Destroyed. */
2431 DEFINE_COMPILER;
2432 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2433 struct sljit_jump *jump;
2434 #endif
2435
2436 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2437 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2438 if (common->utf)
2439 {
2440 #if defined COMPILE_PCRE8
2441 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2442 #elif defined COMPILE_PCRE16
2443 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2444 #endif /* COMPILE_PCRE[8|16] */
2445 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2446 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2447 JUMPHERE(jump);
2448 }
2449 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2450 }
2451
2452 static void read_char8_type(compiler_common *common)
2453 {
2454 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
2455 DEFINE_COMPILER;
2456 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2457 struct sljit_jump *jump;
2458 #endif
2459
2460 #ifdef SUPPORT_UTF
2461 if (common->utf)
2462 {
2463 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2464 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2465 #if defined COMPILE_PCRE8
2466 /* This can be an extra read in some situations, but hopefully
2467 it is needed in most cases. */
2468 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2469 jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2470 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
2471 JUMPHERE(jump);
2472 #elif defined COMPILE_PCRE16
2473 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2474 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2475 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2476 JUMPHERE(jump);
2477 /* Skip low surrogate if necessary. */
2478 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xfc00);
2479 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xd800);
2480 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2481 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
2482 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2483 #elif defined COMPILE_PCRE32
2484 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2485 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2486 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2487 JUMPHERE(jump);
2488 #endif /* COMPILE_PCRE[8|16|32] */
2489 return;
2490 }
2491 #endif /* SUPPORT_UTF */
2492 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2493 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2494 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2495 /* The ctypes array contains only 256 values. */
2496 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2497 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2498 #endif
2499 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2500 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2501 JUMPHERE(jump);
2502 #endif
2503 }
2504
2505 static void skip_char_back(compiler_common *common)
2506 {
2507 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
2508 DEFINE_COMPILER;
2509 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2510 #if defined COMPILE_PCRE8
2511 struct sljit_label *label;
2512
2513 if (common->utf)
2514 {
2515 label = LABEL();
2516 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2517 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2518 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
2519 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
2520 return;
2521 }
2522 #elif defined COMPILE_PCRE16
2523 if (common->utf)
2524 {
2525 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2526 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2527 /* Skip low surrogate if necessary. */
2528 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2529 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
2530 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2531 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2532 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2533 return;
2534 }
2535 #endif /* COMPILE_PCRE[8|16] */
2536 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2537 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2538 }
2539
2540 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpiftrue)
2541 {
2542 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
2543 DEFINE_COMPILER;
2544
2545 if (nltype == NLTYPE_ANY)
2546 {
2547 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
2548 add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2549 }
2550 else if (nltype == NLTYPE_ANYCRLF)
2551 {
2552 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_CR);
2553 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2554 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
2555 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
2556 add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2557 }
2558 else
2559 {
2560 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
2561 add_jump(compiler, backtracks, CMP(jumpiftrue ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
2562 }
2563 }
2564
2565 #ifdef SUPPORT_UTF
2566
2567 #if defined COMPILE_PCRE8
2568 static void do_utfreadchar(compiler_common *common)
2569 {
2570 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2571 of the character (>= 0xc0). Return char value in TMP1, length - 1 in TMP2. */
2572 DEFINE_COMPILER;
2573 struct sljit_jump *jump;
2574
2575 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2576 /* Searching for the first zero. */
2577 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
2578 jump = JUMP(SLJIT_C_NOT_ZERO);
2579 /* Two byte sequence. */
2580 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2581 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2582 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1f);
2583 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2584 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2585 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2586 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2587 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2588 JUMPHERE(jump);
2589
2590 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10);
2591 jump = JUMP(SLJIT_C_NOT_ZERO);
2592 /* Three byte sequence. */
2593 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2594 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0f);
2595 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 12);
2596 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2597 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2598 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2599 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2600 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2601 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2602 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2603 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
2604 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2605 JUMPHERE(jump);
2606
2607 /* Four byte sequence. */
2608 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2609 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x07);
2610 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 18);
2611 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2612 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
2613 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2614 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2615 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2616 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2617 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2618 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(3));
2619 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2620 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2621 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2622 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
2623 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2624 }
2625
2626 static void do_utfreadtype8(compiler_common *common)
2627 {
2628 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
2629 of the character (>= 0xc0). Return value in TMP1. */
2630 DEFINE_COMPILER;
2631 struct sljit_jump *jump;
2632 struct sljit_jump *compare;
2633
2634 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2635
2636 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
2637 jump = JUMP(SLJIT_C_NOT_ZERO);
2638 /* Two byte sequence. */
2639 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2640 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2641 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
2642 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2643 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2644 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
2645 compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2646 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2647 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2648
2649 JUMPHERE(compare);
2650 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2651 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2652 JUMPHERE(jump);
2653
2654 /* We only have types for characters less than 256. */
2655 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2656 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2657 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2658 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2659 }
2660
2661 #elif defined COMPILE_PCRE16
2662
2663 static void do_utfreadchar(compiler_common *common)
2664 {
2665 /* Fast decoding a UTF-16 character. TMP1 contains the first 16 bit char
2666 of the character (>= 0xd800). Return char value in TMP1, length - 1 in TMP2. */
2667 DEFINE_COMPILER;
2668 struct sljit_jump *jump;
2669
2670 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2671 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);
2672 /* Do nothing, only return. */
2673 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2674
2675 JUMPHERE(jump);
2676 /* Combine two 16 bit characters. */
2677 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2678 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2679 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2680 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);
2681 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3ff);
2682 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2683 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2684 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2685 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2686 }
2687
2688 #endif /* COMPILE_PCRE[8|16] */
2689
2690 #endif /* SUPPORT_UTF */
2691
2692 #ifdef SUPPORT_UCP
2693
2694 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
2695 #define UCD_BLOCK_MASK 127
2696 #define UCD_BLOCK_SHIFT 7
2697
2698 static void do_getucd(compiler_common *common)
2699 {
2700 /* Search the UCD record for the character comes in TMP1.
2701 Returns chartype in TMP1 and UCD offset in TMP2. */
2702 DEFINE_COMPILER;
2703
2704 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
2705
2706 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2707 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2708 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
2709 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
2710 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2711 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
2712 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
2713 OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
2714 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
2715 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
2716 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2717 }
2718 #endif
2719
2720 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
2721 {
2722 DEFINE_COMPILER;
2723 struct sljit_label *mainloop;
2724 struct sljit_label *newlinelabel = NULL;
2725 struct sljit_jump *start;
2726 struct sljit_jump *end = NULL;
2727 struct sljit_jump *nl = NULL;
2728 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2729 struct sljit_jump *singlechar;
2730 #endif
2731 jump_list *newline = NULL;
2732 BOOL newlinecheck = FALSE;
2733 BOOL readuchar = FALSE;
2734
2735 if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
2736 common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
2737 newlinecheck = TRUE;
2738
2739 if (firstline)
2740 {
2741 /* Search for the end of the first line. */
2742 SLJIT_ASSERT(common->first_line_end != 0);
2743 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
2744
2745 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2746 {
2747 mainloop = LABEL();
2748 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2749 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2750 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
2751 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2752 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
2753 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
2754 JUMPHERE(end);
2755 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2756 }
2757 else
2758 {
2759 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2760 mainloop = LABEL();
2761 /* Continual stores does not cause data dependency. */
2762 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2763 read_char(common);
2764 check_newlinechar(common, common->nltype, &newline, TRUE);
2765 CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);
2766 JUMPHERE(end);
2767 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2768 set_jumps(newline, LABEL());
2769 }
2770
2771 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
2772 }
2773
2774 start = JUMP(SLJIT_JUMP);
2775
2776 if (newlinecheck)
2777 {
2778 newlinelabel = LABEL();
2779 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2780 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2781 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2782 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
2783 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2784 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2785 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
2786 #endif
2787 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2788 nl = JUMP(SLJIT_JUMP);
2789 }
2790
2791 mainloop = LABEL();
2792
2793 /* Increasing the STR_PTR here requires one less jump in the most common case. */
2794 #ifdef SUPPORT_UTF
2795 if (common->utf) readuchar = TRUE;
2796 #endif
2797 if (newlinecheck) readuchar = TRUE;
2798
2799 if (readuchar)
2800 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2801
2802 if (newlinecheck)
2803 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
2804
2805 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2806 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2807 #if defined COMPILE_PCRE8
2808 if (common->utf)
2809 {
2810 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2811 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2812 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2813 JUMPHERE(singlechar);
2814 }
2815 #elif defined COMPILE_PCRE16
2816 if (common->utf)
2817 {
2818 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2819 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2820 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2821 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2822 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2823 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2824 JUMPHERE(singlechar);
2825 }
2826 #endif /* COMPILE_PCRE[8|16] */
2827 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2828 JUMPHERE(start);
2829
2830 if (newlinecheck)
2831 {
2832 JUMPHERE(end);
2833 JUMPHERE(nl);
2834 }
2835
2836 return mainloop;
2837 }
2838
2839 #define MAX_N_CHARS 3
2840
2841 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)
2842 {
2843 DEFINE_COMPILER;
2844 struct sljit_label *start;
2845 struct sljit_jump *quit;
2846 pcre_uint32 chars[MAX_N_CHARS * 2];
2847 pcre_uchar *cc = common->start + 1 + LINK_SIZE;
2848 int location = 0;
2849 pcre_int32 len, c, bit, caseless;
2850 int must_stop;
2851
2852 /* We do not support alternatives now. */
2853 if (*(common->start + GET(common->start, 1)) == OP_ALT)
2854 return FALSE;
2855
2856 while (TRUE)
2857 {
2858 caseless = 0;
2859 must_stop = 1;
2860 switch(*cc)
2861 {
2862 case OP_CHAR:
2863 must_stop = 0;
2864 cc++;
2865 break;
2866
2867 case OP_CHARI:
2868 caseless = 1;
2869 must_stop = 0;
2870 cc++;
2871 break;
2872
2873 case OP_SOD:
2874 case OP_SOM:
2875 case OP_SET_SOM:
2876 case OP_NOT_WORD_BOUNDARY:
2877 case OP_WORD_BOUNDARY:
2878 case OP_EODN:
2879 case OP_EOD:
2880 case OP_CIRC:
2881 case OP_CIRCM:
2882 case OP_DOLL:
2883 case OP_DOLLM:
2884 /* Zero width assertions. */
2885 cc++;
2886 continue;
2887
2888 case OP_PLUS:
2889 case OP_MINPLUS:
2890 case OP_POSPLUS:
2891 cc++;
2892 break;
2893
2894 case OP_EXACT:
2895 cc += 1 + IMM2_SIZE;
2896 break;
2897
2898 case OP_PLUSI:
2899 case OP_MINPLUSI:
2900 case OP_POSPLUSI:
2901 caseless = 1;
2902 cc++;
2903 break;
2904
2905 case OP_EXACTI:
2906 caseless = 1;
2907 cc += 1 + IMM2_SIZE;
2908 break;
2909
2910 default:
2911 must_stop = 2;
2912 break;
2913 }
2914
2915 if (must_stop == 2)
2916 break;
2917
2918 len = 1;
2919 #ifdef SUPPORT_UTF
2920 if (common->utf && HAS_EXTRALEN(cc[0])) len += GET_EXTRALEN(cc[0]);
2921 #endif
2922
2923 if (caseless && char_has_othercase(common, cc))
2924 {
2925 caseless = char_get_othercase_bit(common, cc);
2926 if (caseless == 0)
2927 return FALSE;
2928 #ifdef COMPILE_PCRE8
2929 caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 8));
2930 #else
2931 if ((caseless & 0x100) != 0)
2932 caseless = ((caseless & 0xff) << 16) | (len - (caseless >> 9));
2933 else
2934 caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 9));
2935 #endif
2936 }
2937 else
2938 caseless = 0;
2939
2940 while (len > 0 && location < MAX_N_CHARS * 2)
2941 {
2942 c = *cc;
2943 bit = 0;
2944 if (len == (caseless & 0xff))
2945 {
2946 bit = caseless >> 8;
2947 c |= bit;
2948 }
2949
2950 chars[location] = c;
2951 chars[location + 1] = bit;
2952
2953 len--;
2954 location += 2;
2955 cc++;
2956 }
2957
2958 if (location >= MAX_N_CHARS * 2 || must_stop != 0)
2959 break;
2960 }
2961
2962 /* At least two characters are required. */
2963 if (location < 2 * 2)
2964 return FALSE;
2965
2966 if (firstline)
2967 {
2968 SLJIT_ASSERT(common->first_line_end != 0);
2969 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
2970 OP2(SLJIT_SUB, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
2971 }
2972 else
2973 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
2974
2975 start = LABEL();
2976 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2977
2978 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2979 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2980 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2981 if (chars[1] != 0)
2982 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]);
2983 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start);
2984 if (location > 2 * 2)
2985 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2986 if (chars[3] != 0)
2987 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[3]);
2988 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[2], start);
2989 if (location > 2 * 2)
2990 {
2991 if (chars[5] != 0)
2992 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[5]);
2993 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[4], start);
2994 }
2995 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2996
2997 JUMPHERE(quit);
2998
2999 if (firstline)
3000 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3001 else
3002 OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
3003 return TRUE;
3004 }
3005
3006 #undef MAX_N_CHARS
3007
3008 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)
3009 {
3010 DEFINE_COMPILER;
3011 struct sljit_label *start;
3012 struct sljit_jump *quit;
3013 struct sljit_jump *found;
3014 pcre_uchar oc, bit;
3015
3016 if (firstline)
3017 {
3018 SLJIT_ASSERT(common->first_line_end != 0);
3019 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3020 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3021 }
3022
3023 start = LABEL();
3024 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3025 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3026
3027 oc = first_char;
3028 if (caseless)
3029 {
3030 oc = TABLE_GET(first_char, common->fcc, first_char);
3031 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3032 if (first_char > 127 && common->utf)
3033 oc = UCD_OTHERCASE(first_char);
3034 #endif
3035 }
3036 if (first_char == oc)
3037 found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, first_char);
3038 else
3039 {
3040 bit = first_char ^ oc;
3041 if (is_powerof2(bit))
3042 {
3043 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
3044 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit);
3045 }
3046 else
3047 {
3048 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char);
3049 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3050 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
3051 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3052 found = JUMP(SLJIT_C_NOT_ZERO);
3053 }
3054 }
3055
3056 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3057 JUMPTO(SLJIT_JUMP, start);
3058 JUMPHERE(found);
3059 JUMPHERE(quit);
3060
3061 if (firstline)
3062 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3063 }
3064
3065 static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
3066 {
3067 DEFINE_COMPILER;
3068 struct sljit_label *loop;
3069 struct sljit_jump *lastchar;
3070 struct sljit_jump *firstchar;
3071 struct sljit_jump *quit;
3072 struct sljit_jump *foundcr = NULL;
3073 struct sljit_jump *notfoundnl;
3074 jump_list *newline = NULL;
3075
3076 if (firstline)
3077 {
3078 SLJIT_ASSERT(common->first_line_end != 0);
3079 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3080 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3081 }
3082
3083 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3084 {
3085 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3086 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3087 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3088 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3089 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3090
3091 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
3092 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
3093 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER_EQUAL);
3094 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3095 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
3096 #endif
3097 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3098
3099 loop = LABEL();
3100 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3101 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3102 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
3103 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3104 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
3105 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
3106
3107 JUMPHERE(quit);
3108 JUMPHERE(firstchar);
3109 JUMPHERE(lastchar);
3110
3111 if (firstline)
3112 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
3113 return;
3114 }
3115
3116 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3117 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3118 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3119 skip_char_back(common);
3120
3121 loop = LABEL();
3122 read_char(common);
3123 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3124 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3125 foundcr = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3126 check_newlinechar(common, common->nltype, &newline, FALSE);
3127 set_jumps(newline, loop);
3128
3129 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3130 {
3131 quit = JUMP(SLJIT_JUMP);
3132 JUMPHERE(foundcr);
3133 notfoundnl = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3134 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3135 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
3136 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3137 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3138 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3139 #endif
3140 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3141 JUMPHERE(notfoundnl);
3142 JUMPHERE(quit);
3143 }
3144 JUMPHERE(lastchar);
3145 JUMPHERE(firstchar);
3146
3147 if (firstline)
3148 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3149 }
3150
3151 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks);
3152
3153 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, sljit_uw start_bits, BOOL firstline)
3154 {
3155 DEFINE_COMPILER;
3156 struct sljit_label *start;
3157 struct sljit_jump *quit;
3158 struct sljit_jump *found = NULL;
3159 jump_list *matches = NULL;
3160 pcre_uint8 inverted_start_bits[32];
3161 int i;
3162 #ifndef COMPILE_PCRE8
3163 struct sljit_jump *jump;
3164 #endif
3165
3166 for (i = 0; i < 32; ++i)
3167 inverted_start_bits[i] = ~(((pcre_uint8*)start_bits)[i]);
3168
3169 if (firstline)
3170 {
3171 SLJIT_ASSERT(common->first_line_end != 0);
3172 OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
3173 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3174 }
3175
3176 start = LABEL();
3177 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3178 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3179 #ifdef SUPPORT_UTF
3180 if (common->utf)
3181 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3182 #endif
3183
3184 if (!check_class_ranges(common, inverted_start_bits, (inverted_start_bits[31] & 0x80) != 0, &matches))
3185 {
3186 #ifndef COMPILE_PCRE8
3187 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255);
3188 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
3189 JUMPHERE(jump);
3190 #endif
3191 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3192 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3193 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), start_bits);
3194 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3195 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3196 found = JUMP(SLJIT_C_NOT_ZERO);
3197 }
3198
3199 #ifdef SUPPORT_UTF
3200 if (common->utf)
3201 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3202 #endif
3203 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3204 #ifdef SUPPORT_UTF
3205 #if defined COMPILE_PCRE8
3206 if (common->utf)
3207 {
3208 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
3209 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3210 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3211 }
3212 #elif defined COMPILE_PCRE16
3213 if (common->utf)
3214 {
3215 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
3216 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3217 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3218 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3219 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3220 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3221 }
3222 #endif /* COMPILE_PCRE[8|16] */
3223 #endif /* SUPPORT_UTF */
3224 JUMPTO(SLJIT_JUMP, start);
3225 if (found != NULL)
3226 JUMPHERE(found);
3227 if (matches != NULL)
3228 set_jumps(matches, LABEL());
3229 JUMPHERE(quit);
3230
3231 if (firstline)
3232 OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
3233 }
3234
3235 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
3236 {
3237 DEFINE_COMPILER;
3238 struct sljit_label *loop;
3239 struct sljit_jump *toolong;
3240 struct sljit_jump *alreadyfound;
3241 struct sljit_jump *found;
3242 struct sljit_jump *foundoc = NULL;
3243 struct sljit_jump *notfound;
3244 pcre_uint32 oc, bit;
3245
3246 SLJIT_ASSERT(common->req_char_ptr != 0);
3247 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr);
3248 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
3249 toolong = CMP(SLJIT_C_LESS, TMP1, 0, STR_END, 0);
3250 alreadyfound = CMP(SLJIT_C_LESS, STR_PTR, 0, TMP2, 0);
3251
3252 if (has_firstchar)
3253 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3254 else
3255 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
3256
3257 loop = LABEL();
3258 notfound = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, STR_END, 0);
3259
3260 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
3261 oc = req_char;
3262 if (caseless)
3263 {
3264 oc = TABLE_GET(req_char, common->fcc, req_char);
3265 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3266 if (req_char > 127 && common->utf)
3267 oc = UCD_OTHERCASE(req_char);
3268 #endif
3269 }
3270 if (req_char == oc)
3271 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
3272 else
3273 {
3274 bit = req_char ^ oc;
3275 if (is_powerof2(bit))
3276 {
3277 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
3278 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
3279 }
3280 else
3281 {
3282 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
3283 foundoc = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, oc);
3284 }
3285 }
3286 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3287 JUMPTO(SLJIT_JUMP, loop);
3288
3289 JUMPHERE(found);
3290 if (foundoc)
3291 JUMPHERE(foundoc);
3292 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr, TMP1, 0);
3293 JUMPHERE(alreadyfound);
3294 JUMPHERE(toolong);
3295 return notfound;
3296 }
3297
3298 static void do_revertframes(compiler_common *common)
3299 {
3300 DEFINE_COMPILER;
3301 struct sljit_jump *jump;
3302 struct sljit_label *mainloop;
3303
3304 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3305 OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
3306 GET_LOCAL_BASE(TMP3, 0, 0);
3307
3308 /* Drop frames until we reach STACK_TOP. */
3309 mainloop = LABEL();
3310 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
3311 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0);
3312 jump = JUMP(SLJIT_C_SIG_LESS_EQUAL);
3313
3314 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
3315 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
3316 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_sw));
3317 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
3318 JUMPTO(SLJIT_JUMP, mainloop);
3319
3320 JUMPHERE(jump);
3321 jump = JUMP(SLJIT_C_SIG_LESS);
3322 /* End of dropping frames. */
3323 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3324
3325 JUMPHERE(jump);
3326 OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
3327 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
3328 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
3329 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
3330 JUMPTO(SLJIT_JUMP, mainloop);
3331 }
3332
3333 static void check_wordboundary(compiler_common *common)
3334 {
3335 DEFINE_COMPILER;
3336 struct sljit_jump *skipread;
3337 jump_list *skipread_list = NULL;
3338 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
3339 struct sljit_jump *jump;
3340 #endif
3341
3342 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
3343
3344 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3345 /* Get type of the previous char, and put it to LOCALS1. */
3346 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3347 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3348 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, 0);
3349 skipread = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
3350 skip_char_back(common);
3351 check_start_used_ptr(common);
3352 read_char(common);
3353
3354 /* Testing char type. */
3355 #ifdef SUPPORT_UCP
3356 if (common->use_ucp)
3357 {
3358 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3359 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3360 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3361 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3362 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3363 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3364 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3365 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3366 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3367 JUMPHERE(jump);
3368 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0);
3369 }
3370 else
3371 #endif
3372 {
3373 #ifndef COMPILE_PCRE8
3374 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3375 #elif defined SUPPORT_UTF
3376 /* Here LOCALS1 has already been zeroed. */
3377 jump = NULL;
3378 if (common->utf)
3379 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3380 #endif /* COMPILE_PCRE8 */
3381 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
3382 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
3383 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3384 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
3385 #ifndef COMPILE_PCRE8
3386 JUMPHERE(jump);
3387 #elif defined SUPPORT_UTF
3388 if (jump != NULL)
3389 JUMPHERE(jump);
3390 #endif /* COMPILE_PCRE8 */
3391 }
3392 JUMPHERE(skipread);
3393
3394 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3395 check_str_end(common, &skipread_list);
3396 peek_char(common);
3397
3398 /* Testing char type. This is a code duplication. */
3399 #ifdef SUPPORT_UCP
3400 if (common->use_ucp)
3401 {
3402 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3403 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3404 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3405 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3406 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3407 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3408 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3409 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3410 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3411 JUMPHERE(jump);
3412 }
3413 else
3414 #endif
3415 {
3416 #ifndef COMPILE_PCRE8
3417 /* TMP2 may be destroyed by peek_char. */
3418 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3419 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3420 #elif defined SUPPORT_UTF
3421 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3422 jump = NULL;
3423 if (common->utf)
3424 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3425 #endif
3426 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
3427 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
3428 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
3429 #ifndef COMPILE_PCRE8
3430 JUMPHERE(jump);
3431 #elif defined SUPPORT_UTF
3432 if (jump != NULL)
3433 JUMPHERE(jump);
3434 #endif /* COMPILE_PCRE8 */
3435 }
3436 set_jumps(skipread_list, LABEL());
3437
3438 OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3439 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3440 }
3441
3442 /*
3443 range format:
3444
3445 ranges[0] = length of the range (max MAX_RANGE_SIZE, -1 means invalid range).
3446 ranges[1] = first bit (0 or 1)
3447 ranges[2-length] = position of the bit change (when the current bit is not equal to the previous)
3448 */
3449
3450 static BOOL check_ranges(compiler_common *common, int *ranges, jump_list **backtracks, BOOL readch)
3451 {
3452 DEFINE_COMPILER;
3453 struct sljit_jump *jump;
3454
3455 if (ranges[0] < 0)
3456 return FALSE;
3457
3458 switch(ranges[0])
3459 {
3460 case 1:
3461 if (readch)
3462 read_char(common);
3463 add_jump(compiler, backtracks, CMP(ranges[1] == 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3464 return TRUE;
3465
3466 case 2:
3467 if (readch)
3468 read_char(common);
3469 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
3470 add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
3471 return TRUE;
3472
3473 case 4:
3474 if (ranges[2] + 1 == ranges[3] && ranges[4] + 1 == ranges[5])
3475 {
3476 if (readch)
3477 read_char(common);
3478 if (ranges[1] != 0)
3479 {
3480 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3481 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));
3482 }
3483 else
3484 {
3485 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]);
3486 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));
3487 JUMPHERE(jump);
3488 }
3489 return TRUE;
3490 }
3491 if ((ranges[3] - ranges[2]) == (ranges[5] - ranges[4]) && is_powerof2(ranges[4] - ranges[2]))
3492 {
3493 if (readch)
3494 read_char(common);
3495 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4] - ranges[2]);
3496 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4]);
3497 add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[5] - ranges[4]));
3498 return TRUE;
3499 }
3500 return FALSE;
3501
3502 default:
3503 return FALSE;
3504 }
3505 }
3506
3507 static void get_ctype_ranges(compiler_common *common, int flag, int *ranges)
3508 {
3509 int i, bit, length;
3510 const pcre_uint8 *ctypes = (const pcre_uint8*)common->ctypes;
3511
3512 bit = ctypes[0] & flag;
3513 ranges[0] = -1;
3514 ranges[1] = bit != 0 ? 1 : 0;
3515 length = 0;
3516
3517 for (i = 1; i < 256; i++)
3518 if ((ctypes[i] & flag) != bit)
3519 {
3520 if (length >= MAX_RANGE_SIZE)
3521 return;
3522 ranges[2 + length] = i;
3523 length++;
3524 bit ^= flag;
3525 }
3526
3527 if (bit != 0)
3528 {
3529 if (length >= MAX_RANGE_SIZE)
3530 return;
3531 ranges[2 + length] = 256;
3532 length++;
3533 }
3534 ranges[0] = length;
3535 }
3536
3537 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks)
3538 {
3539 int ranges[2 + MAX_RANGE_SIZE];
3540 pcre_uint8 bit, cbit, all;
3541 int i, byte, length = 0;
3542
3543 bit = bits[0] & 0x1;
3544 ranges[1] = bit;
3545 /* Can be 0 or 255. */
3546 all = -bit;
3547
3548 for (i = 0; i < 256; )
3549 {
3550 byte = i >> 3;
3551 if ((i & 0x7) == 0 && bits[byte] == all)
3552 i += 8;
3553 else
3554 {
3555 cbit = (bits[byte] >> (i & 0x7)) & 0x1;
3556 if (cbit != bit)
3557 {
3558 if (length >= MAX_RANGE_SIZE)
3559 return FALSE;
3560 ranges[2 + length] = i;
3561 length++;
3562 bit = cbit;
3563 all = -cbit;
3564 }
3565 i++;
3566 }
3567 }
3568
3569 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
3570 {
3571 if (length >= MAX_RANGE_SIZE)
3572 return FALSE;
3573 ranges[2 + length] = 256;
3574 length++;
3575 }
3576 ranges[0] = length;
3577
3578 return check_ranges(common, ranges, backtracks, FALSE);
3579 }
3580
3581 static void check_anynewline(compiler_common *common)
3582 {
3583 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3584 DEFINE_COMPILER;
3585
3586 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3587
3588 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
3589 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
3590 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3591 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
3592 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3593 #ifdef COMPILE_PCRE8
3594 if (common->utf)
3595 {
3596 #endif
3597 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3598 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
3599 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
3600 #ifdef COMPILE_PCRE8
3601 }
3602 #endif
3603 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3604 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3605 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3606 }
3607
3608 static void check_hspace(compiler_common *common)
3609 {
3610 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3611 DEFINE_COMPILER;
3612
3613 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3614
3615 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
3616 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3617 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
3618 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3619 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
3620 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3621 #ifdef COMPILE_PCRE8
3622 if (common->utf)
3623 {
3624 #endif
3625 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3626 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
3627 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3628 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
3629 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3630 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
3631 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
3632 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3633 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
3634 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3635 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
3636 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3637 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
3638 #ifdef COMPILE_PCRE8
3639 }
3640 #endif
3641 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3642 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3643
3644 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3645 }
3646
3647 static void check_vspace(compiler_common *common)
3648 {
3649 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3650 DEFINE_COMPILER;
3651
3652 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3653
3654 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
3655 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
3656 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3657 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
3658 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3659 #ifdef COMPILE_PCRE8
3660 if (common->utf)
3661 {
3662 #endif
3663 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3664 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
3665 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
3666 #ifdef COMPILE_PCRE8
3667 }
3668 #endif
3669 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3670 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3671
3672 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3673 }
3674
3675 #define CHAR1 STR_END
3676 #define CHAR2 STACK_TOP
3677
3678 static void do_casefulcmp(compiler_common *common)
3679 {
3680 DEFINE_COMPILER;
3681 struct sljit_jump *jump;
3682 struct sljit_label *label;
3683
3684 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3685 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3686 OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
3687 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR2, 0);
3688 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3689 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3690
3691 label = LABEL();
3692 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
3693 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3694 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
3695 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
3696 JUMPTO(SLJIT_C_NOT_ZERO, label);
3697
3698 JUMPHERE(jump);
3699 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3700 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
3701 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3702 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3703 }
3704
3705 #define LCC_TABLE STACK_LIMIT
3706
3707 static void do_caselesscmp(compiler_common *common)
3708 {
3709 DEFINE_COMPILER;
3710 struct sljit_jump *jump;
3711 struct sljit_label *label;
3712
3713 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3714 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3715
3716 OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
3717 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR1, 0);
3718 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, CHAR2, 0);
3719 OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
3720 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3721 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3722
3723 label = LABEL();
3724 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
3725 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3726 #ifndef COMPILE_PCRE8
3727 jump = CMP(SLJIT_C_GREATER, CHAR1, 0, SLJIT_IMM, 255);
3728 #endif
3729 OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
3730 #ifndef COMPILE_PCRE8
3731 JUMPHERE(jump);
3732 jump = CMP(SLJIT_C_GREATER, CHAR2, 0, SLJIT_IMM, 255);
3733 #endif
3734 OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
3735 #ifndef COMPILE_PCRE8
3736 JUMPHERE(jump);
3737 #endif
3738 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
3739 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
3740 JUMPTO(SLJIT_C_NOT_ZERO, label);
3741
3742 JUMPHERE(jump);
3743 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3744 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
3745 OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3746 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3747 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3748 }
3749
3750 #undef LCC_TABLE
3751 #undef CHAR1
3752 #undef CHAR2
3753
3754 #if defined SUPPORT_UTF && defined SUPPORT_UCP
3755
3756 static const pcre_uchar * SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
3757 {
3758 /* This function would be ineffective to do in JIT level. */
3759 pcre_uint32 c1, c2;
3760 const pcre_uchar *src2 = args->uchar_ptr;
3761 const pcre_uchar *end2 = args->end;
3762 const ucd_record *ur;
3763 const pcre_uint32 *pp;
3764
3765 while (src1 < end1)
3766 {
3767 if (src2 >= end2)
3768 return (pcre_uchar*)1;
3769 GETCHARINC(c1, src1);
3770 GETCHARINC(c2, src2);
3771 ur = GET_UCD(c2);
3772 if (c1 != c2 && c1 != c2 + ur->other_case)
3773 {
3774 pp = PRIV(ucd_caseless_sets) + ur->caseset;
3775 for (;;)
3776 {
3777 if (c1 < *pp) return NULL;
3778 if (c1 == *pp++) break;
3779 }
3780 }
3781 }
3782 return src2;
3783 }
3784
3785 #endif /* SUPPORT_UTF && SUPPORT_UCP */
3786
3787 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
3788 compare_context* context, jump_list **backtracks)
3789 {
3790 DEFINE_COMPILER;
3791 unsigned int othercasebit = 0;
3792 pcre_uchar *othercasechar = NULL;
3793 #ifdef SUPPORT_UTF
3794 int utflength;
3795 #endif
3796
3797 if (caseless && char_has_othercase(common, cc))
3798 {
3799 othercasebit = char_get_othercase_bit(common, cc);
3800 SLJIT_ASSERT(othercasebit);
3801 /* Extracting bit difference info. */
3802 #if defined COMPILE_PCRE8
3803 othercasechar = cc + (othercasebit >> 8);
3804 othercasebit &= 0xff;
3805 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3806 /* Note that this code only handles characters in the BMP. If there
3807 ever are characters outside the BMP whose othercase differs in only one
3808 bit from itself (there currently are none), this code will need to be
3809 revised for COMPILE_PCRE32. */
3810 othercasechar = cc + (othercasebit >> 9);
3811 if ((othercasebit & 0x100) != 0)
3812 othercasebit = (othercasebit & 0xff) << 8;
3813 else
3814 othercasebit &= 0xff;
3815 #endif /* COMPILE_PCRE[8|16|32] */
3816 }
3817
3818 if (context->sourcereg == -1)
3819 {
3820 #if defined COMPILE_PCRE8
3821 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3822 if (context->length >= 4)
3823 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3824 else if (context->length >= 2)
3825 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3826 else
3827 #endif
3828 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3829 #elif defined COMPILE_PCRE16
3830 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3831 if (context->length >= 4)
3832 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3833 else
3834 #endif
3835 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3836 #elif defined COMPILE_PCRE32
3837 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3838 #endif /* COMPILE_PCRE[8|16|32] */
3839 context->sourcereg = TMP2;
3840 }
3841
3842 #ifdef SUPPORT_UTF
3843 utflength = 1;
3844 if (common->utf && HAS_EXTRALEN(*cc))
3845 utflength += GET_EXTRALEN(*cc);
3846
3847 do
3848 {
3849 #endif
3850
3851 context->length -= IN_UCHARS(1);
3852 #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
3853
3854 /* Unaligned read is supported. */
3855 if (othercasebit != 0 && othercasechar == cc)
3856 {
3857 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
3858 context->oc.asuchars[context->ucharptr] = othercasebit;
3859 }
3860 else
3861 {
3862 context->c.asuchars[context->ucharptr] = *cc;
3863 context->oc.asuchars[context->ucharptr] = 0;
3864 }
3865 context->ucharptr++;
3866
3867 #if defined COMPILE_PCRE8
3868 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
3869 #else
3870 if (context->ucharptr >= 2 || context->length == 0)
3871 #endif
3872 {
3873 if (context->length >= 4)
3874 OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3875 else if (context->length >= 2)
3876 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3877 #if defined COMPILE_PCRE8
3878 else if (context->length >= 1)
3879 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3880 #endif /* COMPILE_PCRE8 */
3881 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
3882
3883 switch(context->ucharptr)
3884 {
3885 case 4 / sizeof(pcre_uchar):
3886 if (context->oc.asint != 0)
3887 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
3888 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
3889 break;
3890
3891 case 2 / sizeof(pcre_uchar):
3892 if (context->oc.asushort != 0)
3893 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
3894 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
3895 break;
3896
3897 #ifdef COMPILE_PCRE8
3898 case 1:
3899 if (context->oc.asbyte != 0)
3900 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
3901 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
3902 break;
3903 #endif
3904
3905 default:
3906 SLJIT_ASSERT_STOP();
3907 break;
3908 }
3909 context->ucharptr = 0;
3910 }
3911
3912 #else
3913
3914 /* Unaligned read is unsupported or in 32 bit mode. */
3915 if (context->length >= 1)
3916 OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3917
3918 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
3919
3920 if (othercasebit != 0 && othercasechar == cc)
3921 {
3922 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
3923 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
3924 }
3925 else
3926 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
3927
3928 #endif
3929
3930 cc++;
3931 #ifdef SUPPORT_UTF
3932 utflength--;
3933 }
3934 while (utflength > 0);
3935 #endif
3936
3937 return cc;
3938 }
3939
3940 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3941
3942 #define SET_TYPE_OFFSET(value) \
3943 if ((value) != typeoffset) \
3944 { \
3945 if ((value) > typeoffset) \
3946 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
3947 else \
3948 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
3949 } \
3950 typeoffset = (value);
3951
3952 #define SET_CHAR_OFFSET(value) \
3953 if ((value) != charoffset) \
3954 { \
3955 if ((value) > charoffset) \
3956 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (value) - charoffset); \
3957 else \
3958 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, charoffset - (value)); \
3959 } \
3960 charoffset = (value);
3961
3962 static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
3963 {
3964 DEFINE_COMPILER;
3965 jump_list *found = NULL;
3966 jump_list **list = (*cc & XCL_NOT) == 0 ? &found : backtracks;
3967 pcre_int32 c, charoffset;
3968 const pcre_uint32 *other_cases;
3969 struct sljit_jump *jump = NULL;
3970 pcre_uchar *ccbegin;
3971 int compares, invertcmp, numberofcmps;
3972 #ifdef SUPPORT_UCP
3973 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
3974 BOOL charsaved = FALSE;
3975 int typereg = TMP1, scriptreg = TMP1;
3976 pcre_int32 typeoffset;
3977 #endif
3978
3979 /* Although SUPPORT_UTF must be defined, we are
3980 not necessary in utf mode even in 8 bit mode. */
3981 detect_partial_match(common, backtracks);
3982 read_char(common);
3983
3984 if ((*cc++ & XCL_MAP) != 0)
3985 {
3986 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3987 #ifndef COMPILE_PCRE8
3988 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3989 #elif defined SUPPORT_UTF
3990 if (common->utf)
3991 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3992 #endif
3993
3994 if (!check_class_ranges(common, (const pcre_uint8 *)cc, TRUE, list))
3995 {
3996 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3997 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3998 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
3999 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4000 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4001 add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));
4002 }
4003
4004 #ifndef COMPILE_PCRE8
4005 JUMPHERE(jump);
4006 #elif defined SUPPORT_UTF
4007 if (common->utf)
4008 JUMPHERE(jump);
4009 #endif
4010 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4011 #ifdef SUPPORT_UCP
4012 charsaved = TRUE;
4013 #endif
4014 cc += 32 / sizeof(pcre_uchar);
4015 }
4016
4017 /* Scanning the necessary info. */
4018 ccbegin = cc;
4019 compares = 0;
4020 while (*cc != XCL_END)
4021 {
4022 compares++;
4023 if (*cc == XCL_SINGLE)
4024 {
4025 cc += 2;
4026 #ifdef SUPPORT_UTF
4027 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
4028 #endif
4029 #ifdef SUPPORT_UCP
4030 needschar = TRUE;
4031 #endif
4032 }
4033 else if (*cc == XCL_RANGE)
4034 {
4035 cc += 2;
4036 #ifdef SUPPORT_UTF
4037 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
4038 #endif
4039 cc++;
4040 #ifdef SUPPORT_UTF
4041 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
4042 #endif
4043 #ifdef SUPPORT_UCP
4044 needschar = TRUE;
4045 #endif
4046 }
4047 #ifdef SUPPORT_UCP
4048 else
4049 {
4050 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
4051 cc++;
4052 switch(*cc)
4053 {
4054 case PT_ANY:
4055 break;
4056
4057 case PT_LAMP:
4058 case PT_GC:
4059 case PT_PC:
4060 case PT_ALNUM:
4061 needstype = TRUE;
4062 break;
4063
4064 case PT_SC:
4065 needsscript = TRUE;
4066 break;
4067
4068 case PT_SPACE:
4069 case PT_PXSPACE:
4070 case PT_WORD:
4071 needstype = TRUE;
4072 needschar = TRUE;
4073 break;
4074
4075 case PT_CLIST:
4076 case PT_UCNC:
4077 needschar = TRUE;
4078 break;
4079
4080 default:
4081 SLJIT_ASSERT_STOP();
4082 break;
4083 }
4084 cc += 2;
4085 }
4086 #endif
4087 }
4088
4089 #ifdef SUPPORT_UCP
4090 /* Simple register allocation. TMP1 is preferred if possible. */
4091 if (needstype || needsscript)
4092 {
4093 if (needschar && !charsaved)
4094 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4095 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4096 if (needschar)
4097 {
4098 if (needstype)
4099 {
4100 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
4101 typereg = RETURN_ADDR;
4102 }
4103
4104 if (needsscript)
4105 scriptreg = TMP3;
4106 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4107 }
4108 else if (needstype && needsscript)
4109 scriptreg = TMP3;
4110 /* In all other cases only one of them was specified, and that can goes to TMP1. */
4111
4112 if (needsscript)
4113 {
4114 if (scriptreg == TMP1)
4115 {
4116 OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4117 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3);
4118 }
4119 else
4120 {
4121 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
4122 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4123 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0);
4124 }
4125 }
4126 }
4127 #endif
4128
4129 /* Generating code. */
4130 cc = ccbegin;
4131 charoffset = 0;
4132 numberofcmps = 0;
4133 #ifdef SUPPORT_UCP
4134 typeoffset = 0;
4135 #endif
4136
4137 while (*cc != XCL_END)
4138 {
4139 compares--;
4140 invertcmp = (compares == 0 && list != backtracks);
4141 jump = NULL;
4142
4143 if (*cc == XCL_SINGLE)
4144 {
4145 cc ++;
4146 #ifdef SUPPORT_UTF
4147 if (common->utf)
4148 {
4149 GETCHARINC(c, cc);
4150 }
4151 else
4152 #endif
4153 c = *cc++;
4154
4155 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
4156 {
4157 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4158 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_EQUAL);
4159 numberofcmps++;
4160 }
4161 else if (numberofcmps > 0)
4162 {
4163 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4164 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4165 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4166 numberofcmps = 0;
4167 }
4168 else
4169 {
4170 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
4171 numberofcmps = 0;
4172 }
4173 }
4174 else if (*cc == XCL_RANGE)
4175 {
4176 cc ++;
4177 #ifdef SUPPORT_UTF
4178 if (common->utf)
4179 {
4180 GETCHARINC(c, cc);
4181 }
4182 else
4183 #endif
4184 c = *cc++;
4185 SET_CHAR_OFFSET(c);
4186 #ifdef SUPPORT_UTF
4187 if (common->utf)
4188 {
4189 GETCHARINC(c, cc);
4190 }
4191 else
4192 #endif
4193 c = *cc++;
4194 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
4195 {
4196 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4197 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);
4198 numberofcmps++;
4199 }
4200 else if (numberofcmps > 0)
4201 {
4202 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4203 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4204 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4205 numberofcmps = 0;
4206 }
4207 else
4208 {
4209 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
4210 numberofcmps = 0;
4211 }
4212 }
4213 #ifdef SUPPORT_UCP
4214 else
4215 {
4216 if (*cc == XCL_NOTPROP)
4217 invertcmp ^= 0x1;
4218 cc++;
4219 switch(*cc)
4220 {
4221 case PT_ANY:
4222 if (list != backtracks)
4223 {
4224 if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0))
4225 continue;
4226 }
4227 else if (cc[-1] == XCL_NOTPROP)
4228 continue;
4229 jump = JUMP(SLJIT_JUMP);
4230 break;
4231
4232 case PT_LAMP:
4233 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
4234 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4235 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
4236 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4237 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
4238 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4239 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4240 break;
4241
4242 case PT_GC:
4243 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
4244 SET_TYPE_OFFSET(c);
4245 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
4246 break;
4247
4248 case PT_PC:
4249 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
4250 break;
4251
4252 case PT_SC:
4253 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, scriptreg, 0, SLJIT_IMM, (int)cc[1]);
4254 break;
4255
4256 case PT_SPACE:
4257 case PT_PXSPACE:
4258 if (*cc == PT_SPACE)
4259 {
4260 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4261 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 11 - charoffset);
4262 }
4263 SET_CHAR_OFFSET(9);
4264 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 13 - 9);
4265 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4266 if (*cc == PT_SPACE)
4267 JUMPHERE(jump);
4268
4269 SET_TYPE_OFFSET(ucp_Zl);
4270 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
4271 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4272 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4273 break;
4274
4275 case PT_WORD:
4276 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE - charoffset);
4277 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4278 /* Fall through. */
4279
4280 case PT_ALNUM:
4281 SET_TYPE_OFFSET(ucp_Ll);
4282 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4283 OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, (*cc == PT_ALNUM) ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);
4284 SET_TYPE_OFFSET(ucp_Nd);
4285 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4286 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4287 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4288 break;
4289
4290 case PT_CLIST:
4291 other_cases = PRIV(ucd_caseless_sets) + cc[1];
4292
4293 /* At least three characters are required.
4294 Otherwise this case would be handled by the normal code path. */
4295 SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
4296 SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
4297
4298 /* Optimizing character pairs, if their difference is power of 2. */
4299 if (is_powerof2(other_cases[1] ^ other_cases[0]))
4300 {
4301 if (charoffset == 0)
4302 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4303 else
4304 {
4305 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
4306 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4307 }
4308 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
4309 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4310 other_cases += 2;
4311 }
4312 else if (is_powerof2(other_cases[2] ^ other_cases[1]))
4313 {
4314 if (charoffset == 0)
4315 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
4316 else
4317 {
4318 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
4319 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4320 }
4321 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
4322 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4323
4324 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, other_cases[0] - charoffset);
4325 OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4326
4327 other_cases += 3;
4328 }
4329 else
4330 {
4331 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, *other_cases++ - charoffset);
4332 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4333 }
4334
4335 while (*other_cases != NOTACHAR)
4336 {
4337 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, *other_cases++ - charoffset);
4338 OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4339 }
4340 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4341 break;
4342
4343 case PT_UCNC:
4344 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_DOLLAR_SIGN - charoffset);
4345 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4346 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_COMMERCIAL_AT - charoffset);
4347 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4348 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_GRAVE_ACCENT - charoffset);
4349 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4350
4351 SET_CHAR_OFFSET(0xa0);
4352 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd7ff - charoffset);
4353 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4354 SET_CHAR_OFFSET(0);
4355 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
4356 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_GREATER_EQUAL);
4357 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4358 break;
4359 }
4360 cc += 2;
4361 }
4362 #endif
4363
4364 if (jump != NULL)
4365 add_jump(compiler, compares > 0 ? list : backtracks, jump);
4366 }
4367
4368 if (found != NULL)
4369 set_jumps(found, LABEL());
4370 }
4371
4372 #undef SET_TYPE_OFFSET
4373 #undef SET_CHAR_OFFSET
4374
4375 #endif
4376
4377 static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks)
4378 {
4379 DEFINE_COMPILER;
4380 int length;
4381 unsigned int c, oc, bit;
4382 compare_context context;
4383 struct sljit_jump *jump[4];
4384 jump_list *end_list;
4385 #ifdef SUPPORT_UTF
4386 struct sljit_label *label;
4387 #ifdef SUPPORT_UCP
4388 pcre_uchar propdata[5];
4389 #endif
4390 #endif
4391
4392 switch(type)
4393 {
4394 case OP_SOD:
4395 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4396 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4397 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
4398 return cc;
4399
4400 case OP_SOM:
4401 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4402 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
4403 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
4404 return cc;
4405
4406 case OP_NOT_WORD_BOUNDARY:
4407 case OP_WORD_BOUNDARY:
4408 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
4409 add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4410 return cc;
4411
4412 case OP_NOT_DIGIT:
4413 case OP_DIGIT:
4414 /* Digits are usually 0-9, so it is worth to optimize them. */
4415 if (common->digits[0] == -2)
4416 get_ctype_ranges(common, ctype_digit, common->digits);
4417 detect_partial_match(common, backtracks);
4418 /* Flip the starting bit in the negative case. */
4419 if (type == OP_NOT_DIGIT)
4420 common->digits[1] ^= 1;
4421 if (!check_ranges(common, common->digits, backtracks, TRUE))
4422 {
4423 read_char8_type(common);
4424 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
4425 add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4426 }
4427 if (type == OP_NOT_DIGIT)
4428 common->digits[1] ^= 1;
4429 return cc;
4430
4431 case OP_NOT_WHITESPACE:
4432 case OP_WHITESPACE:
4433 detect_partial_match(common, backtracks);
4434 read_char8_type(common);
4435 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
4436 add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4437 return cc;
4438
4439 case OP_NOT_WORDCHAR:
4440 case OP_WORDCHAR:
4441 detect_partial_match(common, backtracks);
4442 read_char8_type(common);
4443 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
4444 add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4445 return cc;
4446
4447 case OP_ANY:
4448 detect_partial_match(common, backtracks);
4449 read_char(common);
4450 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4451 {
4452 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
4453 end_list = NULL;
4454 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
4455 add_jump(compiler, &end_list, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4456 else
4457 check_str_end(common, &end_list);
4458
4459 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4460 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
4461 set_jumps(end_list, LABEL());
4462 JUMPHERE(jump[0]);
4463 }
4464 else
4465 check_newlinechar(common, common->nltype, backtracks, TRUE);
4466 return cc;
4467
4468 case OP_ALLANY:
4469 detect_partial_match(common, backtracks);
4470 #ifdef SUPPORT_UTF
4471 if (common->utf)
4472 {
4473 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4474 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4475 #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
4476 #if defined COMPILE_PCRE8
4477 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4478 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4479 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4480 #elif defined COMPILE_PCRE16
4481 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
4482 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4483 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4484 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4485 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4486 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4487 #endif
4488 JUMPHERE(jump[0]);
4489 #endif /* COMPILE_PCRE[8|16] */
4490 return cc;
4491 }
4492 #endif
4493 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4494 return cc;
4495
4496 case OP_ANYBYTE:
4497 detect_partial_match(common, backtracks);
4498 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4499 return cc;
4500
4501 #ifdef SUPPORT_UTF
4502 #ifdef SUPPORT_UCP
4503 case OP_NOTPROP:
4504 case OP_PROP:
4505 propdata[0] = 0;
4506 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
4507 propdata[2] = cc[0];
4508 propdata[3] = cc[1];
4509 propdata[4] = XCL_END;
4510 compile_xclass_matchingpath(common, propdata, backtracks);
4511 return cc + 2;
4512 #endif
4513 #endif
4514
4515 case OP_ANYNL:
4516 detect_partial_match(common, backtracks);
4517 read_char(common);
4518 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4519 /* We don't need to handle soft partial matching case. */
4520 end_list = NULL;
4521 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
4522 add_jump(compiler, &end_list, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4523 else
4524 check_str_end(common, &end_list);
4525 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4526 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
4527 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4528 jump[2] = JUMP(SLJIT_JUMP);
4529 JUMPHERE(jump[0]);
4530 check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
4531 set_jumps(end_list, LABEL());
4532 JUMPHERE(jump[1]);
4533 JUMPHERE(jump[2]);
4534 return cc;
4535
4536 case OP_NOT_HSPACE:
4537 case OP_HSPACE:
4538 detect_partial_match(common, backtracks);
4539 read_char(common);
4540 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
4541 add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4542 return cc;
4543
4544 case OP_NOT_VSPACE:
4545 case OP_VSPACE:
4546 detect_partial_match(common, backtracks);
4547 read_char(common);
4548 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
4549 add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4550 return cc;
4551
4552 #ifdef SUPPORT_UCP
4553 case OP_EXTUNI:
4554 detect_partial_match(common, backtracks);
4555 read_char(common);
4556 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4557 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
4558 /* Optimize register allocation: use a real register. */
4559 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
4560 OP1(SLJIT_MOV_UB, STACK_TOP, 0, SLJIT_MEM2(TMP1, TMP2), 3);
4561
4562 label = LABEL();
4563 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4564 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
4565 read_char(common);
4566 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4567 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
4568 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM2(TMP1, TMP2), 3);
4569
4570 OP2(SLJIT_SHL, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2);
4571 OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_sw)PRIV(ucp_gbtable));
4572 OP1(SLJIT_MOV, STACK_TOP, 0, TMP2, 0);
4573 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4574 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4575 JUMPTO(SLJIT_C_NOT_ZERO, label);
4576
4577 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
4578 JUMPHERE(jump[0]);
4579 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
4580
4581 if (common->mode == JIT_PARTIAL_HARD_COMPILE)
4582 {
4583 jump[0] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
4584 /* Since we successfully read a char above, partial matching must occure. */
4585 check_partial(common, TRUE);
4586 JUMPHERE(jump[0]);
4587 }
4588 return cc;
4589 #endif
4590
4591 case OP_EODN:
4592 /* Requires rather complex checks. */
4593 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4594 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4595 {
4596 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4597 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4598 if (common->mode == JIT_COMPILE)
4599 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
4600 else
4601 {
4602 jump[1] = CMP(SLJIT_C_EQUAL, TMP2, 0, STR_END, 0);
4603 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
4604 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS);
4605 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
4606 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_NOT_EQUAL);
4607 add_jump(compiler, backtracks, JUMP(SLJIT_C_NOT_EQUAL));
4608 check_partial(common, TRUE);
4609 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4610 JUMPHERE(jump[1]);
4611 }
4612 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4613 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4614 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4615 }
4616 else if (common->nltype == NLTYPE_FIXED)
4617 {
4618 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4619 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4620 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
4621 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
4622 }
4623 else
4624 {
4625 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4626 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4627 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4628 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
4629 jump[2] = JUMP(SLJIT_C_GREATER);
4630 add_jump(compiler, backtracks, JUMP(SLJIT_C_LESS));
4631 /* Equal. */
4632 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4633 jump[3] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
4634 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4635
4636 JUMPHERE(jump[1]);
4637 if (common->nltype == NLTYPE_ANYCRLF)
4638 {
4639 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4640 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, STR_END, 0));
4641 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
4642 }
4643 else
4644 {
4645 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, STR_PTR, 0);
4646 read_char(common);
4647 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
4648 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
4649 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4650 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
4651 }
4652 JUMPHERE(jump[2]);
4653 JUMPHERE(jump[3]);
4654 }
4655 JUMPHERE(jump[0]);
4656 check_partial(common, FALSE);
4657 return cc;
4658
4659 case OP_EOD:
4660 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
4661 check_partial(common, FALSE);
4662 return cc;
4663
4664 case OP_CIRC:
4665 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4666 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
4667 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0));
4668 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
4669 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4670 return cc;
4671
4672 case OP_CIRCM:
4673 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4674 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
4675 jump[1] = CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0);
4676 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
4677 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4678 jump[0] = JUMP(SLJIT_JUMP);
4679 JUMPHERE(jump[1]);
4680
4681 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4682 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4683 {
4684 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4685 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, TMP1, 0));
4686 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4687 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4688 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4689 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4690 }
4691 else
4692 {
4693 skip_char_back(common);
4694 read_char(common);
4695 check_newlinechar(common, common->nltype, backtracks, FALSE);
4696 }
4697 JUMPHERE(jump[0]);
4698 return cc;
4699
4700 case OP_DOLL:
4701 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4702 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
4703 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4704
4705 if (!common->endonly)
4706 compile_char1_matchingpath(common, OP_EODN, cc, backtracks);
4707 else
4708 {
4709 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
4710 check_partial(common, FALSE);
4711 }
4712 return cc;
4713
4714 case OP_DOLLM:
4715 jump[1] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
4716 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4717 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
4718 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4719 check_partial(common, FALSE);
4720 jump[0] = JUMP(SLJIT_JUMP);
4721 JUMPHERE(jump[1]);
4722
4723 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4724 {
4725 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4726 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4727 if (common->mode == JIT_COMPILE)
4728 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, TMP2, 0, STR_END, 0));
4729 else
4730 {
4731 jump[1] = CMP(SLJIT_C_LESS_EQUAL, TMP2, 0, STR_END, 0);
4732 /* STR_PTR = STR_END - IN_UCHARS(1) */
4733 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4734 check_partial(common, TRUE);
4735 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4736 JUMPHERE(jump[1]);
4737 }
4738
4739 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4740 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4741 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4742 }
4743 else
4744 {
4745 peek_char(common);
4746 check_newlinechar(common, common->nltype, backtracks, FALSE);
4747 }
4748 JUMPHERE(jump[0]);
4749 return cc;
4750
4751 case OP_CHAR:
4752 case OP_CHARI:
4753 length = 1;
4754 #ifdef SUPPORT_UTF
4755 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
4756 #endif
4757 if (common->mode == JIT_COMPILE && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
4758 {
4759 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
4760 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
4761
4762 context.length = IN_UCHARS(length);
4763 context.sourcereg = -1;
4764 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4765 context.ucharptr = 0;
4766 #endif
4767 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
4768 }
4769 detect_partial_match(common, backtracks);
4770 read_char(common);
4771 #ifdef SUPPORT_UTF
4772 if (common->utf)
4773 {
4774 GETCHAR(c, cc);
4775 }
4776 else
4777 #endif
4778 c = *cc;
4779 if (type == OP_CHAR || !char_has_othercase(common, cc))
4780 {
4781 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
4782 return cc + length;
4783 }
4784 oc = char_othercase(common, c);
4785 bit = c ^ oc;
4786 if (is_powerof2(bit))
4787 {
4788 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
4789 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
4790 return cc + length;
4791 }
4792 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c);
4793 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4794 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
4795 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4796 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4797 return cc + length;
4798
4799 case OP_NOT:
4800 case OP_NOTI:
4801 detect_partial_match(common, backtracks);
4802 length = 1;
4803 #ifdef SUPPORT_UTF
4804 if (common->utf)
4805 {
4806 #ifdef COMPILE_PCRE8
4807 c = *cc;
4808 if (c < 128)
4809 {
4810 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4811 if (type == OP_NOT || !char_has_othercase(common, cc))
4812 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4813 else
4814 {
4815 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
4816 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
4817 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
4818 }
4819 /* Skip the variable-length character. */
4820 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4821 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4822 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4823 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4824 JUMPHERE(jump[0]);
4825 return cc + 1;
4826 }
4827 else
4828 #endif /* COMPILE_PCRE8 */
4829 {
4830 GETCHARLEN(c, cc, length);
4831 read_char(common);
4832 }
4833 }
4834 else
4835 #endif /* SUPPORT_UTF */
4836 {
4837 read_char(common);
4838 c = *cc;
4839 }
4840
4841 if (type == OP_NOT || !char_has_othercase(common, cc))
4842 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4843 else
4844 {
4845 oc = char_othercase(common, c);
4846 bit = c ^ oc;
4847 if (is_powerof2(bit))
4848 {
4849 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
4850 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
4851 }
4852 else
4853 {
4854 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4855 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, oc));
4856 }
4857 }
4858 return cc + length;
4859
4860 case OP_CLASS:
4861 case OP_NCLASS:
4862 detect_partial_match(common, backtracks);
4863 read_char(common);
4864 if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, backtracks))
4865 return cc + 32 / sizeof(pcre_uchar);
4866
4867 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4868 jump[0] = NULL;
4869 #ifdef COMPILE_PCRE8
4870 /* This check only affects 8 bit mode. In other modes, we
4871 always need to compare the value with 255. */
4872 if (common->utf)
4873 #endif /* COMPILE_PCRE8 */
4874 {
4875 jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4876 if (type == OP_CLASS)
4877 {
4878 add_jump(compiler, backtracks, jump[0]);
4879 jump[0] = NULL;
4880 }
4881 }
4882 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
4883 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4884 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4885 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4886 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4887 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4888 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4889 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4890 if (jump[0] != NULL)
4891 JUMPHERE(jump[0]);
4892 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
4893 return cc + 32 / sizeof(pcre_uchar);
4894
4895 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4896 case OP_XCLASS:
4897 compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
4898 return cc + GET(cc, 0) - 1;
4899 #endif
4900
4901 case OP_REVERSE:
4902 length = GET(cc, 0);
4903 if (length == 0)
4904 return cc + LINK_SIZE;
4905 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4906 #ifdef SUPPORT_UTF
4907 if (common->utf)
4908 {
4909 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4910 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
4911 label = LABEL();
4912 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
4913 skip_char_back(common);
4914 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
4915 JUMPTO(SLJIT_C_NOT_ZERO, label);
4916 }
4917 else
4918 #endif
4919 {
4920 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4921 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
4922 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0));
4923 }
4924 check_start_used_ptr(common);
4925 return cc + LINK_SIZE;
4926 }
4927 SLJIT_ASSERT_STOP();
4928 return cc;
4929 }
4930
4931 static SLJIT_INLINE pcre_uchar *compile_charn_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks)
4932 {
4933 /* This function consumes at least one input character. */
4934 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
4935 DEFINE_COMPILER;
4936 pcre_uchar *ccbegin = cc;
4937 compare_context context;
4938 int size;
4939
4940 context.length = 0;
4941 do
4942 {
4943 if (cc >= ccend)
4944 break;
4945
4946 if (*cc == OP_CHAR)
4947 {
4948 size = 1;
4949 #ifdef SUPPORT_UTF
4950 if (common->utf && HAS_EXTRALEN(cc[1]))
4951 size += GET_EXTRALEN(cc[1]);
4952 #endif
4953 }
4954 else if (*cc == OP_CHARI)
4955 {
4956 size = 1;
4957 #ifdef SUPPORT_UTF
4958 if (common->utf)
4959 {
4960 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
4961 size = 0;
4962 else if (HAS_EXTRALEN(cc[1]))
4963 size += GET_EXTRALEN(cc[1]);
4964 }
4965 else
4966 #endif
4967 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
4968 size = 0;
4969 }
4970 else
4971 size = 0;
4972
4973 cc += 1 + size;
4974 context.length += IN_UCHARS(size);
4975 }
4976 while (size > 0 && context.length <= 128);
4977
4978 cc = ccbegin;
4979 if (context.length > 0)
4980 {
4981 /* We have a fixed-length byte sequence. */
4982 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
4983 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
4984
4985 context.sourcereg = -1;
4986 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4987 context.ucharptr = 0;
4988 #endif
4989 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
4990 return cc;
4991 }
4992
4993 /* A non-fixed length character will be checked if length == 0. */
4994 return compile_char1_matchingpath(common, *cc, cc + 1, backtracks);
4995 }
4996
4997 static struct sljit_jump *compile_ref_checks(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
4998 {
4999 DEFINE_COMPILER;
5000 int offset = GET2(cc, 1) << 1;
5001
5002 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5003 if (!common->jscript_compat)
5004 {
5005 if (backtracks == NULL)
5006 {
5007 /* OVECTOR(1) contains the "string begin - 1" constant. */
5008 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
5009 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
5010 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5011 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5012 return JUMP(SLJIT_C_NOT_ZERO);
5013 }
5014 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5015 }
5016 return CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5017 }
5018
5019 /* Forward definitions. */
5020 static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);
5021 static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
5022
5023 #define PUSH_BACKTRACK(size, ccstart, error) \
5024 do \
5025 { \
5026 backtrack = sljit_alloc_memory(compiler, (size)); \
5027 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5028 return error; \
5029 memset(backtrack, 0, size); \
5030 backtrack->prev = parent->top; \
5031 backtrack->cc = (ccstart); \
5032 parent->top = backtrack; \
5033 } \
5034 while (0)
5035
5036 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
5037 do \
5038 { \
5039 backtrack = sljit_alloc_memory(compiler, (size)); \
5040 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5041 return; \
5042 memset(backtrack, 0, size); \
5043 backtrack->prev = parent->top; \
5044 backtrack->cc = (ccstart); \
5045 parent->top = backtrack; \
5046 } \
5047 while (0)
5048
5049 #define BACKTRACK_AS(type) ((type *)backtrack)
5050
5051 static pcre_uchar *compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
5052 {
5053 DEFINE_COMPILER;
5054 int offset = GET2(cc, 1) << 1;
5055 struct sljit_jump *jump = NULL;
5056 struct sljit_jump *partial;
5057 struct sljit_jump *nopartial;
5058
5059 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5060 /* OVECTOR(1) contains the "string begin - 1" constant. */
5061 if (withchecks && !common->jscript_compat)
5062 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5063
5064 #if defined SUPPORT_UTF && defined SUPPORT_UCP
5065 if (common->utf && *cc == OP_REFI)
5066 {
5067 SLJIT_ASSERT(TMP1 == SLJIT_SCRATCH_REG1 && STACK_TOP == SLJIT_SCRATCH_REG2 && TMP2 == SLJIT_SCRATCH_REG3);
5068 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5069 if (withchecks)
5070 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, TMP2, 0);
5071
5072 /* Needed to save important temporary registers. */
5073 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
5074 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0);
5075 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0);
5076 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
5077 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
5078 if (common->mode == JIT_COMPILE)
5079 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
5080 else
5081 {
5082 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
5083 nopartial = CMP(SLJIT_C_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
5084 check_partial(common, FALSE);
5085 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5086 JUMPHERE(nopartial);
5087 }
5088 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
5089 }
5090 else
5091 #endif /* SUPPORT_UTF && SUPPORT_UCP */
5092 {
5093 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0);
5094 if (withchecks)
5095 jump = JUMP(SLJIT_C_ZERO);
5096
5097 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
5098 partial = CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0);
5099 if (common->mode == JIT_COMPILE)
5100 add_jump(compiler, backtracks, partial);
5101
5102 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
5103 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5104
5105 if (common->mode != JIT_COMPILE)
5106 {
5107 nopartial = JUMP(SLJIT_JUMP);
5108 JUMPHERE(partial);
5109 /* TMP2 -= STR_END - STR_PTR */
5110 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
5111 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
5112 partial = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0);
5113 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
5114 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
5115 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5116 JUMPHERE(partial);
5117 check_partial(common, FALSE);
5118 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5119 JUMPHERE(nopartial);
5120 }
5121 }
5122
5123 if (jump != NULL)
5124 {
5125 if (emptyfail)
5126 add_jump(compiler, backtracks, jump);
5127 else
5128 JUMPHERE(jump);
5129 }
5130 return cc + 1 + IMM2_SIZE;
5131 }
5132
5133 static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5134 {
5135 DEFINE_COMPILER;
5136 backtrack_common *backtrack;
5137 pcre_uchar type;
5138 struct sljit_label *label;
5139 struct sljit_jump *zerolength;
5140 struct sljit_jump *jump = NULL;
5141 pcre_uchar *ccbegin = cc;
5142 int min = 0, max = 0;
5143 BOOL minimize;
5144
5145 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
5146
5147 type = cc[1 + IMM2_SIZE];
5148 minimize = (type & 0x1) != 0;
5149 switch(type)
5150 {
5151 case OP_CRSTAR:
5152 case OP_CRMINSTAR:
5153 min = 0;
5154 max = 0;
5155 cc += 1 + IMM2_SIZE + 1;
5156 break;
5157 case OP_CRPLUS:
5158 case OP_CRMINPLUS:
5159 min = 1;
5160 max = 0;
5161 cc += 1 + IMM2_SIZE + 1;
5162 break;
5163 case OP_CRQUERY:
5164 case OP_CRMINQUERY:
5165 min = 0;
5166 max = 1;
5167 cc += 1 + IMM2_SIZE + 1;
5168 break;
5169 case OP_CRRANGE:
5170 case OP_CRMINRANGE:
5171 min = GET2(cc, 1 + IMM2_SIZE + 1);
5172 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
5173 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
5174 break;
5175 default:
5176 SLJIT_ASSERT_STOP();
5177 break;
5178 }
5179
5180 if (!minimize)
5181 {
5182 if (min == 0)
5183 {
5184 allocate_stack(common, 2);
5185 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5186 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
5187 /* Temporary release of STR_PTR. */
5188 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5189 zerolength = compile_ref_checks(common, ccbegin, NULL);
5190 /* Restore if not zero length. */
5191 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5192 }
5193 else
5194 {
5195 allocate_stack(common, 1);
5196 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5197 zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks);
5198 }
5199
5200 if (min > 1 || max > 1)
5201 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
5202
5203 label = LABEL();
5204 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
5205
5206 if (min > 1 || max > 1)
5207 {
5208 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
5209 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5210 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
5211 if (min > 1)
5212 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, label);
5213 if (max > 1)
5214 {
5215 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
5216 allocate_stack(common, 1);
5217 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5218 JUMPTO(SLJIT_JUMP, label);
5219 JUMPHERE(jump);
5220 }
5221 }
5222
5223 if (max == 0)
5224 {
5225 /* Includes min > 1 case as well. */
5226 allocate_stack(common, 1);
5227 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5228 JUMPTO(SLJIT_JUMP, label);
5229 }
5230
5231 JUMPHERE(zerolength);
5232 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
5233
5234 decrease_call_count(common);
5235 return cc;
5236 }
5237
5238 allocate_stack(common, 2);
5239 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5240 if (type != OP_CRMINSTAR)
5241 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
5242
5243 if (min == 0)
5244 {
5245 zerolength = compile_ref_checks(common, ccbegin, NULL);
5246 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5247 jump = JUMP(SLJIT_JUMP);
5248 }
5249 else
5250 zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks);
5251
5252 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
5253 if (max > 0)
5254 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
5255
5256 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
5257 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5258
5259 if (min > 1)
5260 {
5261 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5262 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5263 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5264 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(iterator_backtrack)->matchingpath);
5265 }
5266 else if (max > 0)
5267 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
5268
5269 if (jump != NULL)
5270 JUMPHERE(jump);
5271 JUMPHERE(zerolength);
5272
5273 decrease_call_count(common);
5274 return cc;
5275 }
5276
5277 static SLJIT_INLINE pcre_uchar *compile_recurse_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5278 {
5279 DEFINE_COMPILER;
5280 backtrack_common *backtrack;
5281 recurse_entry *entry = common->entries;
5282 recurse_entry *prev = NULL;
5283 int start = GET(cc, 1);
5284 pcre_uchar *start_cc;
5285 BOOL needs_control_head;
5286
5287 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
5288
5289 /* Inlining simple patterns. */
5290 if (get_framesize(common, common->start + start, TRUE, &needs_control_head) == no_stack)
5291 {
5292 start_cc = common->start + start;
5293 compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
5294 BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
5295 return cc + 1 + LINK_SIZE;
5296 }
5297
5298 while (entry != NULL)
5299 {
5300 if (entry->start == start)
5301 break;
5302 prev = entry;
5303 entry = entry->next;
5304 }
5305
5306 if (entry == NULL)
5307 {
5308 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
5309 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5310 return NULL;
5311 entry->next = NULL;
5312 entry->entry = NULL;
5313 entry->calls = NULL;
5314 entry->start = start;
5315
5316 if (prev != NULL)
5317 prev->next = entry;
5318 else
5319 common->entries = entry;
5320 }
5321
5322 if (common->has_set_som && common->mark_ptr != 0)
5323 {
5324 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
5325 allocate_stack(common, 2);
5326 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
5327 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5328 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5329 }
5330 else if (common->has_set_som || common->mark_ptr != 0)
5331 {
5332 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr);
5333 allocate_stack(common, 1);
5334 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5335 }
5336
5337 if (entry->entry == NULL)
5338 add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
5339 else
5340 JUMPTO(SLJIT_FAST_CALL, entry->entry);
5341 /* Leave if the match is failed. */
5342 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0));
5343 return cc + 1 + LINK_SIZE;
5344 }
5345
5346 static int SLJIT_CALL do_callout(struct jit_arguments* arguments, PUBL(callout_block) *callout_block, pcre_uchar **jit_ovector)
5347 {
5348 const pcre_uchar *begin = arguments->begin;
5349 int *offset_vector = arguments->offsets;
5350 int offset_count = arguments->offset_count;
5351 int i;
5352
5353 if (PUBL(callout) == NULL)
5354 return 0;
5355
5356 callout_block->version = 2;
5357 callout_block->callout_data = arguments->callout_data;
5358
5359 /* Offsets in subject. */
5360 callout_block->subject_length = arguments->end - arguments->begin;
5361 callout_block->start_match = (pcre_uchar*)callout_block->subject - arguments->begin;
5362 callout_block->current_position = (pcre_uchar*)callout_block->offset_vector - arguments->begin;
5363 #if defined COMPILE_PCRE8
5364 callout_block->subject = (PCRE_SPTR)begin;
5365 #elif defined COMPILE_PCRE16
5366 callout_block->subject = (PCRE_SPTR16)begin;
5367 #elif defined COMPILE_PCRE32
5368 callout_block->subject = (PCRE_SPTR32)begin;
5369 #endif
5370
5371 /* Convert and copy the JIT offset vector to the offset_vector array. */
5372 callout_block->capture_top = 0;
5373 callout_block->offset_vector = offset_vector;
5374 for (i = 2; i < offset_count; i += 2)
5375 {
5376 offset_vector[i] = jit_ovector[i] - begin;
5377 offset_vector[i + 1] = jit_ovector[i + 1] - begin;
5378 if (jit_ovector[i] >= begin)
5379 callout_block->capture_top = i;
5380 }
5381
5382 callout_block->capture_top = (callout_block->capture_top >> 1) + 1;
5383 if (offset_count > 0)
5384 offset_vector[0] = -1;
5385 if (offset_count > 1)
5386 offset_vector[1] = -1;
5387 return (*PUBL(callout))(callout_block);
5388 }
5389
5390 /* Aligning to 8 byte. */
5391 #define CALLOUT_ARG_SIZE \
5392 (((int)sizeof(PUBL(callout_block)) + 7) & ~7)
5393
5394 #define CALLOUT_ARG_OFFSET(arg) \
5395 (-CALLOUT_ARG_SIZE + SLJIT_OFFSETOF(PUBL(callout_block), arg))
5396
5397 static SLJIT_INLINE pcre_uchar *compile_callout_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5398 {
5399 DEFINE_COMPILER;
5400 backtrack_common *backtrack;
5401
5402 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
5403
5404 allocate_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
5405
5406 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
5407 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5408 SLJIT_ASSERT(common->capture_last_ptr != 0);
5409 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, cc[1]);
5410 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
5411
5412 /* These pointer sized fields temporarly stores internal variables. */
5413 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
5414 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
5415 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(subject), TMP2, 0);
5416
5417 if (common->mark_ptr != 0)
5418 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
5419 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 2));
5420 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 2 + LINK_SIZE));
5421 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
5422
5423 /* Needed to save important temporary registers. */
5424 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
5425 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG2, 0, STACK_TOP, 0, SLJIT_IMM, CALLOUT_ARG_SIZE);
5426 GET_LOCAL_BASE(SLJIT_SCRATCH_REG3, 0, OVECTOR_START);
5427 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout));
5428 OP1(SLJIT_MOV_SI, SLJIT_RETURN_REG, 0, SLJIT_RETURN_REG, 0);
5429 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
5430 free_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
5431
5432 /* Check return value. */
5433 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
5434 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_C_SIG_GREATER));
5435 if (common->forced_quit_label == NULL)
5436 add_jump(compiler, &common->forced_quit, JUMP(SLJIT_C_SIG_LESS));
5437 else
5438 JUMPTO(SLJIT_C_SIG_LESS, common->forced_quit_label);
5439 return cc + 2 + 2 * LINK_SIZE;
5440 }
5441
5442 #undef CALLOUT_ARG_SIZE
5443 #undef CALLOUT_ARG_OFFSET
5444
5445 static pcre_uchar *compile_assert_matchingpath(compiler_common *common, pcre_uchar *cc, assert_backtrack *backtrack, BOOL conditional)
5446 {
5447 DEFINE_COMPILER;
5448 int framesize;
5449 int extrasize;
5450 BOOL needs_control_head;
5451 int private_data_ptr;
5452 backtrack_common altbacktrack;
5453 pcre_uchar *ccbegin;
5454 pcre_uchar opcode;
5455 pcre_uchar bra = OP_BRA;
5456 jump_list *tmp = NULL;
5457 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
5458 jump_list **found;
5459 /* Saving previous accept variables. */
5460 struct sljit_label *save_quit_label = common->quit_label;
5461 struct sljit_label *save_accept_label = common->accept_label;
5462 jump_list *save_quit = common->quit;
5463 jump_list *save_accept = common->accept;
5464 BOOL save_local_exit = common->local_exit;
5465 struct sljit_jump *jump;
5466 struct sljit_jump *brajump = NULL;
5467
5468 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
5469 {
5470 SLJIT_ASSERT(!conditional);
5471 bra = *cc;
5472 cc++;
5473 }
5474 private_data_ptr = PRIVATE_DATA(cc);
5475 SLJIT_ASSERT(private_data_ptr != 0);
5476 framesize = get_framesize(common, cc, FALSE, &needs_control_head);
5477 backtrack->framesize = framesize;
5478 backtrack->private_data_ptr = private_data_ptr;
5479 opcode = *cc;
5480 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
5481 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
5482 ccbegin = cc;
5483 cc += GET(cc, 1);
5484
5485 if (bra == OP_BRAMINZERO)
5486 {
5487 /* This is a braminzero backtrack path. */
5488 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5489 free_stack(common, 1);
5490 brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5491 }
5492
5493 if (framesize < 0)
5494 {
5495 extrasize = needs_control_head ? 2 : 1;
5496 if (framesize == no_frame)
5497 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
5498 allocate_stack(common, extrasize);
5499 if (needs_control_head)
5500 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
5501 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5502 if (needs_control_head)
5503 {
5504 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
5505 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5506 }
5507 }
5508 else
5509 {
5510 extrasize = needs_control_head ? 3 : 2;
5511 allocate_stack(common, framesize + extrasize);
5512 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5513 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
5514 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
5515 if (needs_control_head)
5516 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
5517 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5518 if (needs_control_head)
5519 {
5520 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
5521 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
5522 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
5523 }
5524 else
5525 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5526 init_frame(common, ccbegin, framesize + extrasize - 1, extrasize, FALSE);
5527 }
5528
5529 memset(&altbacktrack, 0, sizeof(backtrack_common));
5530 common->local_exit = TRUE;
5531 common->quit_label = NULL;
5532 common->quit = NULL;
5533 while (1)
5534 {
5535 common->accept_label = NULL;
5536 common->accept = NULL;
5537 altbacktrack.top = NULL;
5538 altbacktrack.topbacktracks = NULL;
5539
5540 if (*ccbegin == OP_ALT)
5541 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5542
5543 altbacktrack.cc = ccbegin;
5544 compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
5545 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5546 {
5547 common->local_exit = save_local_exit;
5548 common->quit_label = save_quit_label;
5549 common->accept_label = save_accept_label;
5550 common->quit = save_quit;
5551 common->accept = save_accept;
5552 return NULL;
5553 }
5554 common->accept_label = LABEL();
5555 if (common->accept != NULL)
5556 set_jumps(common->accept, common->accept_label);
5557
5558 /* Reset stack. */
5559 if (framesize < 0)
5560 {
5561 if (framesize == no_frame)
5562 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5563 else
5564 free_stack(common, extrasize);
5565 if (needs_control_head)
5566 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
5567 }
5568 else
5569 {
5570 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
5571 {
5572 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5573 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
5574 if (needs_control_head)
5575 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
5576 }
5577 else
5578 {
5579 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5580 if (needs_control_head)
5581 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_sw));
5582 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5583 }
5584 }
5585
5586 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
5587 {
5588 /* We know that STR_PTR was stored on the top of the stack. */
5589 if (conditional)
5590 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? sizeof(sljit_sw) : 0);
5591 else if (bra == OP_BRAZERO)
5592 {
5593 if (framesize < 0)
5594 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
5595 else
5596 {
5597 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
5598 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + extrasize - 1) * sizeof(sljit_sw));
5599 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5600 }
5601 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5602 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5603 }
5604 else if (framesize >= 0)
5605 {
5606 /* For OP_BRA and OP_BRAMINZERO. */
5607 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
5608 }
5609 }
5610 add_jump(compiler, found, JUMP(SLJIT_JUMP));
5611
5612 compile_backtrackingpath(common, altbacktrack.top);
5613 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5614 {
5615 common->local_exit = save_local_exit;
5616 common->quit_label = save_quit_label;
5617 common->accept_label = save_accept_label;
5618 common->quit = save_quit;
5619 common->accept = save_accept;
5620 return NULL;
5621 }
5622 set_jumps(altbacktrack.topbacktracks, LABEL());
5623
5624 if (*cc != OP_ALT)
5625 break;
5626
5627 ccbegin = cc;
5628 cc += GET(cc, 1);
5629 }
5630
5631 /* None of them matched. */
5632 if (common->quit != NULL)
5633 {
5634 jump = JUMP(SLJIT_JUMP);
5635 set_jumps(common->quit, LABEL());
5636 SLJIT_ASSERT(framesize != no_stack);
5637 if (framesize < 0)
5638 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
5639 else
5640 {
5641 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5642 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5643 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
5644 }
5645 JUMPHERE(jump);
5646 }
5647
5648 if (needs_control_head)
5649 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1));
5650
5651 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
5652 {
5653 /* Assert is failed. */
5654 if (conditional || bra == OP_BRAZERO)
5655 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5656
5657 if (framesize < 0)
5658 {
5659 /* The topmost item should be 0. */
5660 if (bra == OP_BRAZERO)
5661 {
5662 if (extrasize == 2)
5663 free_stack(common, 1);
5664 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5665 }
5666 else
5667 free_stack(common, extrasize);
5668 }
5669 else
5670 {
5671 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
5672 /* The topmost item should be 0. */
5673 if (bra == OP_BRAZERO)
5674 {
5675 free_stack(common, framesize + extrasize - 1);
5676 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5677 }
5678 else
5679 free_stack(common, framesize + extrasize);
5680 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5681 }
5682 jump = JUMP(SLJIT_JUMP);
5683 if (bra != OP_BRAZERO)
5684 add_jump(compiler, target, jump);
5685
5686 /* Assert is successful. */
5687 set_jumps(tmp, LABEL());
5688 if (framesize < 0)
5689 {
5690 /* We know that STR_PTR was stored on the top of the stack. */
5691 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
5692 /* Keep the STR_PTR on the top of the stack. */
5693 if (bra == OP_BRAZERO)
5694 {
5695 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5696 if (extrasize == 2)
5697 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5698 }
5699 else if (bra == OP_BRAMINZERO)
5700 {
5701 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5702 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5703 }
5704 }
5705 else
5706 {
5707 if (bra == OP_BRA)
5708 {
5709 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5710 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
5711 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 2) * sizeof(sljit_sw));
5712 }
5713 else
5714 {
5715 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5716 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));
5717 if (extrasize == 2)
5718 {
5719 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5720 if (bra == OP_BRAMINZERO)
5721 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5722 }
5723 else
5724 {
5725 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
5726 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
5727 }
5728 }
5729 }
5730
5731 if (bra == OP_BRAZERO)
5732 {
5733 backtrack->matchingpath = LABEL();
5734 SET_LABEL(jump, backtrack->matchingpath);
5735 }
5736 else if (bra == OP_BRAMINZERO)
5737 {
5738 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
5739 JUMPHERE(brajump);
5740 if (framesize >= 0)
5741 {
5742 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5743 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5744 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
5745 }
5746 set_jumps(backtrack->common.topbacktracks, LABEL());
5747 }
5748 }
5749 else
5750 {
5751 /* AssertNot is successful. */
5752 if (framesize < 0)
5753 {
5754 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5755 if (bra != OP_BRA)
5756 {
5757 if (extrasize == 2)
5758 free_stack(common, 1);
5759 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5760 }
5761 else
5762 free_stack(common, extrasize);
5763 }
5764 else
5765 {
5766 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5767 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
5768 /* The topmost item should be 0. */
5769 if (bra != OP_BRA)
5770 {
5771 free_stack(common, framesize + extrasize - 1);
5772 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5773 }
5774 else
5775 free_stack(common, framesize + extrasize);
5776 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5777 }
5778
5779 if (bra == OP_BRAZERO)
5780 backtrack->matchingpath = LABEL();
5781 else if (bra == OP_BRAMINZERO)
5782 {
5783 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
5784 JUMPHERE(brajump);
5785 }
5786
5787 if (bra != OP_BRA)
5788 {
5789 SLJIT_ASSERT(found == &backtrack->common.topbacktracks);
5790 set_jumps(backtrack->common.topbacktracks, LABEL());
5791 backtrack->common.topbacktracks = NULL;
5792 }
5793 }
5794
5795 common->local_exit = save_local_exit;
5796 common->quit_label = save_quit_label;
5797 common->accept_label = save_accept_label;
5798 common->quit = save_quit;
5799 common->accept = save_accept;
5800 return cc + 1 + LINK_SIZE;
5801 }
5802
5803 static sljit_sw SLJIT_CALL do_searchovector(sljit_uw refno, sljit_sw* locals, pcre_uchar *name_table)
5804 {
5805 int condition = FALSE;
5806 pcre_uchar *slotA = name_table;
5807 pcre_uchar *slotB;
5808 sljit_sw name_count = locals[LOCALS0 / sizeof(sljit_sw)];
5809 sljit_sw name_entry_size = locals[LOCALS1 / sizeof(sljit_sw)];
5810 sljit_sw no_capture;
5811 int i;
5812
5813 locals += refno & 0xff;
5814 refno >>= 8;
5815 no_capture = locals[1];
5816
5817 for (i = 0; i < name_count; i++)
5818 {
5819 if (GET2(slotA, 0) == refno) break;
5820 slotA += name_entry_size;
5821 }
5822
5823 if (i < name_count)
5824 {
5825 /* Found a name for the number - there can be only one; duplicate names
5826 for different numbers are allowed, but not vice versa. First scan down
5827 for duplicates. */
5828
5829 slotB = slotA;
5830 while (slotB > name_table)
5831 {
5832 slotB -= name_entry_size;
5833 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5834 {
5835 condition = locals[GET2(slotB, 0) << 1] != no_capture;
5836 if (condition) break;
5837 }
5838 else break;
5839 }
5840
5841 /* Scan up for duplicates */
5842 if (!condition)
5843 {
5844 slotB = slotA;
5845 for (i++; i < name_count; i++)
5846 {
5847 slotB += name_entry_size;
5848 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5849 {
5850 condition = locals[GET2(slotB, 0) << 1] != no_capture;
5851 if (condition) break;
5852 }
5853 else break;
5854 }
5855 }
5856 }
5857 return condition;
5858 }
5859
5860 static sljit_sw SLJIT_CALL do_searchgroups(sljit_uw recno, sljit_uw* locals, pcre_uchar *name_table)
5861 {
5862 int condition = FALSE;
5863 pcre_uchar *slotA = name_table;
5864 pcre_uchar *slotB;
5865 sljit_uw name_count = locals[LOCALS0 / sizeof(sljit_sw)];
5866 sljit_uw name_entry_size = locals[LOCALS1 / sizeof(sljit_sw)];
5867 sljit_uw group_num = locals[POSSESSIVE0 / sizeof(sljit_sw)];
5868 sljit_uw i;
5869
5870 for (i = 0; i < name_count; i++)
5871 {
5872 if (GET2(slotA, 0) == recno) break;
5873 slotA += name_entry_size;
5874 }
5875
5876 if (i < name_count)
5877 {
5878 /* Found a name for the number - there can be only one; duplicate
5879 names for different numbers are allowed, but not vice versa. First
5880 scan down for duplicates. */
5881
5882 slotB = slotA;
5883 while (slotB > name_table)
5884 {
5885 slotB -= name_entry_size;
5886 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5887 {
5888 condition = GET2(slotB, 0) == group_num;
5889 if (condition) break;
5890 }
5891 else break;
5892 }
5893
5894 /* Scan up for duplicates */
5895 if (!condition)
5896 {
5897 slotB = slotA;
5898 for (i++; i < name_count; i++)
5899 {
5900 slotB += name_entry_size;
5901 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5902 {
5903 condition = GET2(slotB, 0) == group_num;
5904 if (condition) break;
5905 }
5906 else break;
5907 }
5908 }
5909 }
5910 return condition;
5911 }
5912
5913 static SLJIT_INLINE void match_once_common(compiler_common *common, pcre_uchar ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head)
5914 {
5915 DEFINE_COMPILER;
5916 int stacksize;
5917
5918 if (framesize < 0)
5919 {
5920 if (framesize == no_frame)
5921 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5922 else
5923 {
5924 stacksize = needs_control_head ? 1 : 0;
5925 if (ket != OP_KET || has_alternatives)
5926 stacksize++;
5927 free_stack(common, stacksize);
5928 }
5929
5930 if (needs_control_head)
5931 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), (ket != OP_KET || has_alternatives) ? sizeof(sljit_sw) : 0);
5932
5933 /* TMP2 which is set here used by OP_KETRMAX below. */
5934 if (ket == OP_KETRMAX)
5935 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0);
5936 else if (ket == OP_KETRMIN)
5937 {
5938 /* Move the STR_PTR to the private_data_ptr. */
5939 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), 0);
5940 }
5941 }
5942 else
5943 {
5944 stacksize = (ket != OP_KET || has_alternatives) ? 2 : 1;
5945 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + stacksize) * sizeof(sljit_sw));
5946 if (needs_control_head)
5947 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), 0);
5948
5949 if (ket == OP_KETRMAX)
5950 {
5951 /* TMP2 which is set here used by OP_KETRMAX below. */
5952 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5953 }
5954 }
5955 if (needs_control_head)
5956 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, TMP1, 0);
5957 }
5958
5959 static SLJIT_INLINE int match_capture_common(compiler_common *common, int stacksize, int offset, int private_data_ptr)
5960 {
5961 DEFINE_COMPILER;
5962
5963 if (common->capture_last_ptr != 0)
5964 {
5965 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
5966 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
5967 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
5968 stacksize++;
5969 }
5970 if (common->optimized_cbracket[offset >> 1] == 0)
5971 {
5972 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5973 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5974 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
5975 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5976 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
5977 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
5978 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
5979 stacksize += 2;
5980 }
5981 return stacksize;
5982 }
5983
5984 /*
5985 Handling bracketed expressions is probably the most complex part.
5986
5987 Stack layout naming characters:
5988 S - Push the current STR_PTR
5989 0 - Push a 0 (NULL)
5990 A - Push the current STR_PTR. Needed for restoring the STR_PTR
5991 before the next alternative. Not pushed if there are no alternatives.
5992 M - Any values pushed by the current alternative. Can be empty, or anything.
5993 C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
5994 L - Push the previous local (pointed by localptr) to the stack
5995 () - opional values stored on the stack
5996 ()* - optonal, can be stored multiple times
5997
5998 The following list shows the regular expression templates, their PCRE byte codes
5999 and stack layout supported by pcre-sljit.
6000
6001 (?:) OP_BRA | OP_KET A M
6002 () OP_CBRA | OP_KET C M
6003 (?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )*
6004 OP_SBRA | OP_KETRMAX 0 L M S ( L M S )*
6005 (?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )*
6006 OP_SBRA | OP_KETRMIN 0 L M S ( L M S )*
6007 ()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )*
6008 OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )*
6009 ()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )*
6010 OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )*
6011 (?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 )
6012 (?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 )
6013 ()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 )
6014 ()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 )
6015 (?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )*
6016 OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )*
6017 (?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )*
6018 OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )*
6019 ()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )*
6020 OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )*
6021 ()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )*
6022 OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )*
6023
6024
6025 Stack layout naming characters:
6026 A - Push the alternative index (starting from 0) on the stack.
6027 Not pushed if there is no alternatives.
6028 M - Any values pushed by the current alternative. Can be empty, or anything.
6029
6030 The next list shows the possible content of a bracket:
6031 (|) OP_*BRA | OP_ALT ... M A
6032 (?()|) OP_*COND | OP_ALT M A
6033 (?>|) OP_ONCE | OP_ALT ... [stack trace] M A
6034 (?>|) OP_ONCE_NC | OP_ALT ... [stack trace] M A
6035 Or nothing, if trace is unnecessary
6036 */
6037
6038 static pcre_uchar *compile_bracket_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6039 {
6040 DEFINE_COMPILER;
6041 backtrack_common *backtrack;
6042 pcre_uchar opcode;
6043 int private_data_ptr = 0;
6044 int offset = 0;
6045 int stacksize;
6046 pcre_uchar *ccbegin;
6047 pcre_uchar *matchingpath;
6048 pcre_uchar bra = OP_BRA;
6049 pcre_uchar ket;
6050 assert_backtrack *assert;
6051 BOOL has_alternatives;
6052 BOOL needs_control_head = FALSE;
6053 struct sljit_jump *jump;
6054 struct sljit_jump *skip;
6055 struct sljit_label *rmaxlabel = NULL;
6056 struct sljit_jump *braminzerojump = NULL;
6057
6058 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
6059
6060 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
6061 {
6062 bra = *cc;
6063 cc++;
6064 opcode = *cc;
6065 }
6066
6067 opcode = *cc;
6068 ccbegin = cc;
6069 matchingpath = ccbegin + 1 + LINK_SIZE;
6070
6071 if ((opcode == OP_COND || opcode == OP_SCOND) && cc[1 + LINK_SIZE] == OP_DEF)
6072 {
6073 /* Drop this bracket_backtrack. */
6074 parent->top = backtrack->prev;
6075 return bracketend(cc);
6076 }
6077
6078 ket = *(bracketend(cc) - 1 - LINK_SIZE);
6079 SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
6080 SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
6081 cc += GET(cc, 1);
6082
6083 has_alternatives = *cc == OP_ALT;
6084 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
6085 {
6086 has_alternatives = (*matchingpath == OP_RREF) ? FALSE : TRUE;
6087 if (*matchingpath == OP_NRREF)
6088 {
6089 stacksize = GET2(matchingpath, 1);
6090 if (common->currententry == NULL || stacksize == RREF_ANY)
6091 has_alternatives = FALSE;
6092 else if (common->currententry->start == 0)
6093 has_alternatives = stacksize != 0;
6094 else
6095 has_alternatives = stacksize != (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
6096 }
6097 }
6098
6099 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
6100 opcode = OP_SCOND;
6101 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
6102 opcode = OP_ONCE;
6103
6104 if (opcode == OP_CBRA || opcode == OP_SCBRA)
6105 {
6106 /* Capturing brackets has a pre-allocated space. */
6107 offset = GET2(ccbegin, 1 + LINK_SIZE);
6108 if (common->optimized_cbracket[offset] == 0)
6109 {
6110 private_data_ptr = OVECTOR_PRIV(offset);
6111 offset <<= 1;
6112 }
6113 else
6114 {
6115 offset <<= 1;
6116 private_data_ptr = OVECTOR(offset);
6117 }
6118 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
6119 matchingpath += IMM2_SIZE;
6120 }
6121 else if (opcode == OP_ONCE || opcode == OP_SBRA || opcode == OP_SCOND)
6122 {
6123 /* Other brackets simply allocate the next entry. */
6124 private_data_ptr = PRIVATE_DATA(ccbegin);
6125 SLJIT_ASSERT(private_data_ptr != 0);
6126 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
6127 if (opcode == OP_ONCE)
6128 BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, FALSE, &needs_control_head);
6129 }
6130
6131 /* Instructions before the first alternative. */
6132 stacksize = 0;
6133 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
6134 stacksize++;
6135 if (bra == OP_BRAZERO)
6136 stacksize++;
6137
6138 if (stacksize > 0)
6139 allocate_stack(common, stacksize);
6140
6141 stacksize = 0;
6142 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
6143 {
6144 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
6145 stacksize++;
6146 }
6147
6148 if (bra == OP_BRAZERO)
6149 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6150
6151 if (bra == OP_BRAMINZERO)
6152 {
6153 /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
6154 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6155 if (ket != OP_KETRMIN)
6156 {
6157 free_stack(common, 1);
6158 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6159 }
6160 else
6161 {
6162 if (opcode == OP_ONCE || opcode >= OP_SBRA)
6163 {
6164 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6165 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6166 /* Nothing stored during the first run. */
6167 skip = JUMP(SLJIT_JUMP);
6168 JUMPHERE(jump);
6169 /* Checking zero-length iteration. */
6170 if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
6171 {
6172 /* When we come from outside, private_data_ptr contains the previous STR_PTR. */
6173 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6174 }
6175 else
6176 {
6177 /* Except when the whole stack frame must be saved. */
6178 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6179 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (BACKTRACK_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_sw));
6180 }
6181 JUMPHERE(skip);
6182 }
6183 else
6184 {
6185 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6186 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6187 JUMPHERE(jump);
6188 }
6189 }
6190 }
6191
6192 if (ket == OP_KETRMIN)
6193 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
6194
6195 if (ket == OP_KETRMAX)
6196 {
6197 rmaxlabel = LABEL();
6198 if (has_alternatives && opcode != OP_ONCE && opcode < OP_SBRA)
6199 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmaxlabel;
6200 }
6201
6202 /* Handling capturing brackets and alternatives. */
6203 if (opcode == OP_ONCE)
6204 {
6205 stacksize = 0;
6206 if (needs_control_head)
6207 {
6208 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
6209 stacksize++;
6210 }
6211
6212 if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
6213 {
6214 /* Neither capturing brackets nor recursions are found in the block. */
6215 if (ket == OP_KETRMIN)
6216 {
6217 stacksize += 2;
6218 if (!needs_control_head)
6219 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6220 }
6221 else
6222 {
6223 if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
6224 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
6225 if (ket == OP_KETRMAX || has_alternatives)
6226 stacksize++;
6227 }
6228
6229 if (stacksize > 0)
6230 allocate_stack(common, stacksize);
6231
6232 stacksize = 0;
6233 if (needs_control_head)
6234 {
6235 stacksize++;
6236 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6237 }
6238
6239 if (ket == OP_KETRMIN)
6240 {
6241 if (needs_control_head)
6242 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6243 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6244 if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
6245 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, needs_control_head ? (2 * sizeof(sljit_sw)) : sizeof(sljit_sw));
6246 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
6247 }
6248 else if (ket == OP_KETRMAX || has_alternatives)
6249 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6250 }
6251 else
6252 {
6253 if (ket != OP_KET || has_alternatives)
6254 stacksize++;
6255
6256 stacksize += BACKTRACK_AS(bracket_backtrack)->u.framesize + 1;
6257 allocate_stack(common, stacksize);
6258
6259 if (needs_control_head)
6260 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6261
6262 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6263 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
6264
6265 stacksize = needs_control_head ? 1 : 0;
6266 if (ket != OP_KET || has_alternatives)
6267 {
6268 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6269 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
6270 stacksize++;
6271 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6272 }
6273 else
6274 {
6275 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
6276 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6277 }
6278 init_frame(common, ccbegin, BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize, stacksize + 1, FALSE);
6279 }
6280 }
6281 else if (opcode == OP_CBRA || opcode == OP_SCBRA)
6282 {
6283 /* Saving the previous values. */
6284 if (common->optimized_cbracket[offset >> 1] != 0)
6285 {
6286 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset));
6287 allocate_stack(common, 2);
6288 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6289 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr + sizeof(sljit_sw));
6290 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
6291 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
6292 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
6293 }
6294 else
6295 {
6296 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6297 allocate_stack(common, 1);
6298 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
6299 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6300 }
6301 }
6302 else if (opcode == OP_SBRA || opcode == OP_SCOND)
6303 {
6304 /* Saving the previous value. */
6305 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6306 allocate_stack(common, 1);
6307 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
6308 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6309 }
6310 else if (has_alternatives)
6311 {
6312 /* Pushing the starting string pointer. */
6313 allocate_stack(common, 1);
6314 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6315 }
6316
6317 /* Generating code for the first alternative. */
6318 if (opcode == OP_COND || opcode == OP_SCOND)
6319 {
6320 if (*matchingpath == OP_CREF)
6321 {
6322 SLJIT_ASSERT(has_alternatives);
6323 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed),
6324 CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
6325 matchingpath += 1 + IMM2_SIZE;
6326 }
6327 else if (*matchingpath == OP_NCREF)
6328 {
6329 SLJIT_ASSERT(has_alternatives);
6330 stacksize = GET2(matchingpath, 1);
6331 jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(stacksize << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
6332
6333 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
6334 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
6335 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
6336 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, (stacksize << 8) | (common->ovector_start / sizeof(sljit_sw)));
6337 GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, 0);
6338 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, common->name_table);
6339 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchovector));
6340 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
6341 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, 0));
6342
6343 JUMPHERE(jump);
6344 matchingpath += 1 + IMM2_SIZE;
6345 }
6346 else if (*matchingpath == OP_RREF || *matchingpath == OP_NRREF)
6347 {
6348 /* Never has other case. */
6349 BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;
6350
6351 stacksize = GET2(matchingpath, 1);
6352 if (common->currententry == NULL)
6353 stacksize = 0;
6354 else if (stacksize == RREF_ANY)
6355 stacksize = 1;
6356 else if (common->currententry->start == 0)
6357 stacksize = stacksize == 0;
6358 else
6359 stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
6360
6361 if (*matchingpath == OP_RREF || stacksize || common->currententry == NULL)
6362 {
6363 SLJIT_ASSERT(!has_alternatives);
6364 if (stacksize != 0)
6365 matchingpath += 1 + IMM2_SIZE;
6366 else
6367 {
6368 if (*cc == OP_ALT)
6369 {
6370 matchingpath = cc + 1 + LINK_SIZE;
6371 cc += GET(cc, 1);
6372 }
6373 else
6374 matchingpath = cc;
6375 }
6376 }
6377 else
6378 {
6379 SLJIT_ASSERT(has_alternatives);
6380
6381 stacksize = GET2(matchingpath, 1);
6382 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
6383 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
6384 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
6385 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, GET2(common->start, common->currententry->start + 1 + LINK_SIZE));
6386 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, stacksize);
6387 GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, 0);
6388 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, common->name_table);
6389 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchgroups));
6390 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
6391 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, 0));
6392 matchingpath += 1 + IMM2_SIZE;
6393 }
6394 }
6395 else
6396 {
6397 SLJIT_ASSERT(has_alternatives && *matchingpath >= OP_ASSERT && *matchingpath <= OP_ASSERTBACK_NOT);
6398 /* Similar code as PUSH_BACKTRACK macro. */
6399 assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack));
6400 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6401 return NULL;
6402 memset(assert, 0, sizeof(assert_backtrack));
6403 assert->common.cc = matchingpath;
6404 BACKTRACK_AS(bracket_backtrack)->u.assert = assert;
6405 matchingpath = compile_assert_matchingpath(common, matchingpath, assert, TRUE);
6406 }
6407 }
6408
6409 compile_matchingpath(common, matchingpath, cc, backtrack);
6410 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6411 return NULL;
6412
6413 if (opcode == OP_ONCE)
6414 match_once_common(common, ket, BACKTRACK_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
6415
6416 stacksize = 0;
6417 if (ket != OP_KET || bra != OP_BRA)
6418 stacksize++;
6419 if (offset != 0)
6420 {
6421 if (common->capture_last_ptr != 0)
6422 stacksize++;
6423 if (common->optimized_cbracket[offset >> 1] == 0)
6424 stacksize += 2;
6425 }
6426 if (has_alternatives && opcode != OP_ONCE)
6427 stacksize++;
6428
6429 if (stacksize > 0)
6430 allocate_stack(common, stacksize);
6431
6432 stacksize = 0;
6433 if (ket != OP_KET || bra != OP_BRA)
6434 {
6435 if (ket != OP_KET)
6436 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6437 else
6438 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
6439 stacksize++;
6440 }
6441
6442 if (offset != 0)
6443 stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
6444
6445 if (has_alternatives)
6446 {
6447 if (opcode != OP_ONCE)
6448 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
6449 if (ket != OP_KETRMAX)
6450 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
6451 }
6452
6453 /* Must be after the matchingpath label. */
6454 if (offset != 0 && common->optimized_cbracket[offset >> 1] != 0)
6455 {
6456 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
6457 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6458 }
6459
6460 if (ket == OP_KETRMAX)
6461 {
6462 if (opcode == OP_ONCE || opcode >= OP_SBRA)
6463 {
6464 if (has_alternatives)
6465 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
6466 /* Checking zero-length iteration. */
6467 if (opcode != OP_ONCE)
6468 {
6469 CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0, rmaxlabel);
6470 /* Drop STR_PTR for greedy plus quantifier. */
6471 if (bra != OP_BRAZERO)
6472 free_stack(common, 1);
6473 }
6474 else
6475 /* TMP2 must contain the starting STR_PTR. */
6476 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmaxlabel);
6477 }
6478 else
6479 JUMPTO(SLJIT_JUMP, rmaxlabel);
6480 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
6481 }
6482
6483 if (bra == OP_BRAZERO)
6484 BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL();
6485
6486 if (bra == OP_BRAMINZERO)
6487 {
6488 /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */
6489 JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath);
6490 if (braminzerojump != NULL)
6491 {
6492 JUMPHERE(braminzerojump);
6493 /* We need to release the end pointer to perform the
6494 backtrack for the zero-length iteration. When
6495 framesize is < 0, OP_ONCE will do the release itself. */
6496 if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0)
6497 {
6498 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6499 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6500 }
6501 else if (ket == OP_KETRMIN && opcode != OP_ONCE)
6502 free_stack(common, 1);
6503 }
6504 /* Continue to the normal backtrack. */
6505 }
6506
6507 if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
6508 decrease_call_count(common);
6509
6510 /* Skip the other alternatives. */
6511 while (*cc == OP_ALT)
6512 cc += GET(cc, 1);
6513 cc += 1 + LINK_SIZE;
6514
6515 /* Temporarily encoding the needs_control_head in framesize. */
6516 if (opcode == OP_ONCE)
6517 BACKTRACK_AS(bracket_backtrack)->u.framesize = (BACKTRACK_AS(bracket_backtrack)->u.framesize << 1) | (needs_control_head ? 1 : 0);
6518 return cc;
6519 }
6520
6521 static pcre_uchar *compile_bracketpos_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6522 {
6523 DEFINE_COMPILER;
6524 backtrack_common *backtrack;
6525 pcre_uchar opcode;
6526 int private_data_ptr;
6527 int cbraprivptr = 0;
6528 BOOL needs_control_head;
6529 int framesize;
6530 int stacksize;
6531 int offset = 0;
6532 BOOL zero = FALSE;
6533 pcre_uchar *ccbegin = NULL;
6534 int stack; /* Also contains the offset of control head. */
6535 struct sljit_label *loop = NULL;
6536 struct jump_list *emptymatch = NULL;
6537
6538 PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL);
6539 if (*cc == OP_BRAPOSZERO)
6540 {
6541 zero = TRUE;
6542 cc++;
6543 }
6544
6545 opcode = *cc;
6546 private_data_ptr = PRIVATE_DATA(cc);
6547 SLJIT_ASSERT(private_data_ptr != 0);
6548 BACKTRACK_AS(bracketpos_backtrack)->private_data_ptr = private_data_ptr;
6549 switch(opcode)
6550 {
6551 case OP_BRAPOS:
6552 case OP_SBRAPOS:
6553 ccbegin = cc + 1 + LINK_SIZE;
6554 break;
6555
6556 case OP_CBRAPOS:
6557 case OP_SCBRAPOS:
6558 offset = GET2(cc, 1 + LINK_SIZE);
6559 /* This case cannot be optimized in the same was as
6560 normal capturing brackets. */
6561 SLJIT_ASSERT(common->optimized_cbracket[offset] == 0);
6562 cbraprivptr = OVECTOR_PRIV(offset);
6563 offset <<= 1;
6564 ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
6565 break;
6566
6567 default:
6568 SLJIT_ASSERT_STOP();
6569 break;
6570 }
6571
6572 framesize = get_framesize(common, cc, FALSE, &needs_control_head);
6573 BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;
6574 if (framesize < 0)
6575 {
6576 if (offset != 0)
6577 {
6578 stacksize = 2;
6579 if (common->capture_last_ptr != 0)
6580 stacksize++;
6581 }
6582 else
6583 stacksize = 1;
6584
6585 if (needs_control_head)
6586 stacksize++;
6587 if (!zero)
6588 stacksize++;
6589
6590 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
6591 allocate_stack(common, stacksize);
6592 if (framesize == no_frame)
6593 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
6594
6595 stack = 0;
6596 if (offset != 0)
6597 {
6598 stack = 2;
6599 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
6600 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
6601 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
6602 if (common->capture_last_ptr != 0)
6603 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
6604 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
6605 if (needs_control_head)
6606 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
6607 if (common->capture_last_ptr != 0)
6608 {
6609 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
6610 stack = 3;
6611 }
6612 }
6613 else
6614 {
6615 if (needs_control_head)
6616 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
6617 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6618 stack = 1;
6619 }
6620
6621 if (needs_control_head)
6622 stack++;
6623 if (!zero)
6624 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), SLJIT_IMM, 1);
6625 if (needs_control_head)
6626 {
6627 stack--;
6628 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
6629 }
6630 }
6631 else
6632 {
6633 stacksize = framesize + 1;
6634 if (!zero)
6635 stacksize++;
6636 if (needs_control_head)
6637 stacksize++;
6638 if (offset == 0)
6639 stacksize++;
6640 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
6641
6642 allocate_stack(common, stacksize);
6643 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6644 if (needs_control_head)
6645 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
6646 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, -STACK(stacksize - 1));
6647
6648 stack = 0;
6649 if (!zero)
6650 {
6651 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
6652 stack = 1;
6653 }
6654 if (needs_control_head)
6655 {
6656 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
6657 stack++;
6658 }
6659 if (offset == 0)
6660 {
6661 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
6662 stack++;
6663 }
6664 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
6665 init_frame(common, cc, stacksize - 1, stacksize - framesize, FALSE);
6666 stack -= 1 + (offset == 0);
6667 }
6668
6669 if (offset != 0)
6670 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
6671
6672 loop = LABEL();
6673 while (*cc != OP_KETRPOS)
6674 {
6675 backtrack->top = NULL;
6676 backtrack->topbacktracks = NULL;
6677 cc += GET(cc, 1);
6678
6679 compile_matchingpath(common, ccbegin, cc, backtrack);
6680 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6681 return NULL;
6682
6683 if (framesize < 0)
6684 {
6685 if (framesize == no_frame)
6686 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6687
6688 if (offset != 0)
6689 {
6690 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
6691 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6692 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
6693 if (common->capture_last_ptr != 0)
6694 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
6695 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6696 }
6697 else
6698 {
6699 if (opcode == OP_SBRAPOS)
6700 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6701 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6702 }
6703
6704 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
6705 add_jump(compiler, &emptymatch, CMP(SLJIT_C_EQUAL, TMP1, 0, STR_PTR, 0));
6706
6707 if (!zero)
6708 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
6709 }
6710 else
6711 {
6712 if (offset != 0)
6713 {
6714 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, stacksize * sizeof(sljit_sw));
6715 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
6716 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6717 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
6718 if (common->capture_last_ptr != 0)
6719 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
6720 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6721 }
6722 else
6723 {
6724 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6725 OP2(SLJIT_ADD, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
6726 if (opcode == OP_SBRAPOS)
6727 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw));
6728 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw), STR_PTR, 0);
6729 }
6730
6731 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
6732 add_jump(compiler, &emptymatch, CMP(SLJIT_C_EQUAL, TMP1, 0, STR_PTR, 0));
6733
6734 if (!zero)
6735 {
6736 if (framesize < 0)
6737 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
6738 else
6739 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6740 }
6741 }
6742
6743