/[pcre]/code/trunk/pcre_jit_compile.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1270 - (show annotations)
Tue Mar 5 08:05:17 2013 UTC (6 years, 7 months ago) by zherczeg
File MIME type: text/plain
File size: 278934 byte(s)
Error occurred while calculating annotation data.
Fix callout issues revealed by 64 bit big endian systems.
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2013 University of Cambridge
10
11 The machine code generator part (this module) was written by Zoltan Herczeg
12 Copyright (c) 2010-2013
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include "pcre_internal.h"
48
49 #if defined SUPPORT_JIT
50
51 /* All-in-one: Since we use the JIT compiler only from here,
52 we just include it. This way we don't need to touch the build
53 system files. */
54
55 #define SLJIT_MALLOC(size) (PUBL(malloc))(size)
56 #define SLJIT_FREE(ptr) (PUBL(free))(ptr)
57 #define SLJIT_CONFIG_AUTO 1
58 #define SLJIT_CONFIG_STATIC 1
59 #define SLJIT_VERBOSE 0
60 #define SLJIT_DEBUG 0
61
62 #include "sljit/sljitLir.c"
63
64 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 #error Unsupported architecture
66 #endif
67
68 /* Defines for debugging purposes. */
69
70 /* 1 - Use unoptimized capturing brackets.
71 2 - Enable capture_last_ptr (includes option 1). */
72 /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
73
74 /* Allocate memory for the regex stack on the real machine stack.
75 Fast, but limited size. */
76 #define MACHINE_STACK_SIZE 32768
77
78 /* Growth rate for stack allocated by the OS. Should be the multiply
79 of page size. */
80 #define STACK_GROWTH_RATE 8192
81
82 /* Enable to check that the allocation could destroy temporaries. */
83 #if defined SLJIT_DEBUG && SLJIT_DEBUG
84 #define DESTROY_REGISTERS 1
85 #endif
86
87 /*
88 Short summary about the backtracking mechanism empolyed by the jit code generator:
89
90 The code generator follows the recursive nature of the PERL compatible regular
91 expressions. The basic blocks of regular expressions are condition checkers
92 whose execute different commands depending on the result of the condition check.
93 The relationship between the operators can be horizontal (concatenation) and
94 vertical (sub-expression) (See struct backtrack_common for more details).
95
96 'ab' - 'a' and 'b' regexps are concatenated
97 'a+' - 'a' is the sub-expression of the '+' operator
98
99 The condition checkers are boolean (true/false) checkers. Machine code is generated
100 for the checker itself and for the actions depending on the result of the checker.
101 The 'true' case is called as the matching path (expected path), and the other is called as
102 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
103 branches on the matching path.
104
105 Greedy star operator (*) :
106 Matching path: match happens.
107 Backtrack path: match failed.
108 Non-greedy star operator (*?) :
109 Matching path: no need to perform a match.
110 Backtrack path: match is required.
111
112 The following example shows how the code generated for a capturing bracket
113 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
114 we have the following regular expression:
115
116 A(B|C)D
117
118 The generated code will be the following:
119
120 A matching path
121 '(' matching path (pushing arguments to the stack)
122 B matching path
123 ')' matching path (pushing arguments to the stack)
124 D matching path
125 return with successful match
126
127 D backtrack path
128 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
129 B backtrack path
130 C expected path
131 jump to D matching path
132 C backtrack path
133 A backtrack path
134
135 Notice, that the order of backtrack code paths are the opposite of the fast
136 code paths. In this way the topmost value on the stack is always belong
137 to the current backtrack code path. The backtrack path must check
138 whether there is a next alternative. If so, it needs to jump back to
139 the matching path eventually. Otherwise it needs to clear out its own stack
140 frame and continue the execution on the backtrack code paths.
141 */
142
143 /*
144 Saved stack frames:
145
146 Atomic blocks and asserts require reloading the values of private data
147 when the backtrack mechanism performed. Because of OP_RECURSE, the data
148 are not necessarly known in compile time, thus we need a dynamic restore
149 mechanism.
150
151 The stack frames are stored in a chain list, and have the following format:
152 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
153
154 Thus we can restore the private data to a particular point in the stack.
155 */
156
157 typedef struct jit_arguments {
158 /* Pointers first. */
159 struct sljit_stack *stack;
160 const pcre_uchar *str;
161 const pcre_uchar *begin;
162 const pcre_uchar *end;
163 int *offsets;
164 pcre_uchar *uchar_ptr;
165 pcre_uchar *mark_ptr;
166 void *callout_data;
167 /* Everything else after. */
168 int real_offset_count;
169 int offset_count;
170 int call_limit;
171 pcre_uint8 notbol;
172 pcre_uint8 noteol;
173 pcre_uint8 notempty;
174 pcre_uint8 notempty_atstart;
175 } jit_arguments;
176
177 typedef struct executable_functions {
178 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
179 PUBL(jit_callback) callback;
180 void *userdata;
181 pcre_uint32 top_bracket;
182 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
183 } executable_functions;
184
185 typedef struct jump_list {
186 struct sljit_jump *jump;
187 struct jump_list *next;
188 } jump_list;
189
190 typedef struct stub_list {
191 struct sljit_jump *start;
192 struct sljit_label *quit;
193 struct stub_list *next;
194 } stub_list;
195
196 enum frame_types { no_frame = -1, no_stack = -2 };
197
198 typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
199
200 /* The following structure is the key data type for the recursive
201 code generator. It is allocated by compile_matchingpath, and contains
202 the aguments for compile_backtrackingpath. Must be the first member
203 of its descendants. */
204 typedef struct backtrack_common {
205 /* Concatenation stack. */
206 struct backtrack_common *prev;
207 jump_list *nextbacktracks;
208 /* Internal stack (for component operators). */
209 struct backtrack_common *top;
210 jump_list *topbacktracks;
211 /* Opcode pointer. */
212 pcre_uchar *cc;
213 } backtrack_common;
214
215 typedef struct assert_backtrack {
216 backtrack_common common;
217 jump_list *condfailed;
218 /* Less than 0 (-1) if a frame is not needed. */
219 int framesize;
220 /* Points to our private memory word on the stack. */
221 int private_data_ptr;
222 /* For iterators. */
223 struct sljit_label *matchingpath;
224 } assert_backtrack;
225
226 typedef struct bracket_backtrack {
227 backtrack_common common;
228 /* Where to coninue if an alternative is successfully matched. */
229 struct sljit_label *alternative_matchingpath;
230 /* For rmin and rmax iterators. */
231 struct sljit_label *recursive_matchingpath;
232 /* For greedy ? operator. */
233 struct sljit_label *zero_matchingpath;
234 /* Contains the branches of a failed condition. */
235 union {
236 /* Both for OP_COND, OP_SCOND. */
237 jump_list *condfailed;
238 assert_backtrack *assert;
239 /* For OP_ONCE. -1 if not needed. */
240 int framesize;
241 } u;
242 /* Points to our private memory word on the stack. */
243 int private_data_ptr;
244 } bracket_backtrack;
245
246 typedef struct bracketpos_backtrack {
247 backtrack_common common;
248 /* Points to our private memory word on the stack. */
249 int private_data_ptr;
250 /* Reverting stack is needed. */
251 int framesize;
252 /* Allocated stack size. */
253 int stacksize;
254 } bracketpos_backtrack;
255
256 typedef struct braminzero_backtrack {
257 backtrack_common common;
258 struct sljit_label *matchingpath;
259 } braminzero_backtrack;
260
261 typedef struct iterator_backtrack {
262 backtrack_common common;
263 /* Next iteration. */
264 struct sljit_label *matchingpath;
265 } iterator_backtrack;
266
267 typedef struct recurse_entry {
268 struct recurse_entry *next;
269 /* Contains the function entry. */
270 struct sljit_label *entry;
271 /* Collects the calls until the function is not created. */
272 jump_list *calls;
273 /* Points to the starting opcode. */
274 int start;
275 } recurse_entry;
276
277 typedef struct recurse_backtrack {
278 backtrack_common common;
279 BOOL inlined_pattern;
280 } recurse_backtrack;
281
282 #define MAX_RANGE_SIZE 6
283
284 typedef struct compiler_common {
285 struct sljit_compiler *compiler;
286 pcre_uchar *start;
287
288 /* Maps private data offset to each opcode. */
289 int *private_data_ptrs;
290 /* Tells whether the capturing bracket is optimized. */
291 pcre_uint8 *optimized_cbracket;
292 /* Starting offset of private data for capturing brackets. */
293 int cbraptr;
294 /* OVector starting point. Must be divisible by 2. */
295 int ovector_start;
296 /* Last known position of the requested byte. */
297 int req_char_ptr;
298 /* Head of the last recursion. */
299 int recursive_head_ptr;
300 /* First inspected character for partial matching. */
301 int start_used_ptr;
302 /* Starting pointer for partial soft matches. */
303 int hit_start;
304 /* End pointer of the first line. */
305 int first_line_end;
306 /* Points to the marked string. */
307 int mark_ptr;
308 /* Points to the last matched capture block index. */
309 int capture_last_ptr;
310
311 /* Flipped and lower case tables. */
312 const pcre_uint8 *fcc;
313 sljit_sw lcc;
314 /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
315 int mode;
316 /* Newline control. */
317 int nltype;
318 int newline;
319 int bsr_nltype;
320 /* Dollar endonly. */
321 int endonly;
322 BOOL has_set_som;
323 /* Tables. */
324 sljit_sw ctypes;
325 int digits[2 + MAX_RANGE_SIZE];
326 /* Named capturing brackets. */
327 sljit_uw name_table;
328 sljit_sw name_count;
329 sljit_sw name_entry_size;
330
331 /* Labels and jump lists. */
332 struct sljit_label *partialmatchlabel;
333 struct sljit_label *quit_label;
334 struct sljit_label *forced_quit_label;
335 struct sljit_label *accept_label;
336 stub_list *stubs;
337 recurse_entry *entries;
338 recurse_entry *currententry;
339 jump_list *partialmatch;
340 jump_list *quit;
341 jump_list *forced_quit;
342 jump_list *accept;
343 jump_list *calllimit;
344 jump_list *stackalloc;
345 jump_list *revertframes;
346 jump_list *wordboundary;
347 jump_list *anynewline;
348 jump_list *hspace;
349 jump_list *vspace;
350 jump_list *casefulcmp;
351 jump_list *caselesscmp;
352 BOOL jscript_compat;
353 #ifdef SUPPORT_UTF
354 BOOL utf;
355 #ifdef SUPPORT_UCP
356 BOOL use_ucp;
357 #endif
358 #ifndef COMPILE_PCRE32
359 jump_list *utfreadchar;
360 #endif
361 #ifdef COMPILE_PCRE8
362 jump_list *utfreadtype8;
363 #endif
364 #endif /* SUPPORT_UTF */
365 #ifdef SUPPORT_UCP
366 jump_list *getucd;
367 #endif
368 } compiler_common;
369
370 /* For byte_sequence_compare. */
371
372 typedef struct compare_context {
373 int length;
374 int sourcereg;
375 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
376 int ucharptr;
377 union {
378 sljit_si asint;
379 sljit_uh asushort;
380 #if defined COMPILE_PCRE8
381 sljit_ub asbyte;
382 sljit_ub asuchars[4];
383 #elif defined COMPILE_PCRE16
384 sljit_uh asuchars[2];
385 #elif defined COMPILE_PCRE32
386 sljit_ui asuchars[1];
387 #endif
388 } c;
389 union {
390 sljit_si asint;
391 sljit_uh asushort;
392 #if defined COMPILE_PCRE8
393 sljit_ub asbyte;
394 sljit_ub asuchars[4];
395 #elif defined COMPILE_PCRE16
396 sljit_uh asuchars[2];
397 #elif defined COMPILE_PCRE32
398 sljit_ui asuchars[1];
399 #endif
400 } oc;
401 #endif
402 } compare_context;
403
404 /* Undefine sljit macros. */
405 #undef CMP
406
407 /* Used for accessing the elements of the stack. */
408 #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_sw))
409
410 #define TMP1 SLJIT_SCRATCH_REG1
411 #define TMP2 SLJIT_SCRATCH_REG3
412 #define TMP3 SLJIT_TEMPORARY_EREG2
413 #define STR_PTR SLJIT_SAVED_REG1
414 #define STR_END SLJIT_SAVED_REG2
415 #define STACK_TOP SLJIT_SCRATCH_REG2
416 #define STACK_LIMIT SLJIT_SAVED_REG3
417 #define ARGUMENTS SLJIT_SAVED_EREG1
418 #define CALL_COUNT SLJIT_SAVED_EREG2
419 #define RETURN_ADDR SLJIT_TEMPORARY_EREG1
420
421 /* Local space layout. */
422 /* These two locals can be used by the current opcode. */
423 #define LOCALS0 (0 * sizeof(sljit_sw))
424 #define LOCALS1 (1 * sizeof(sljit_sw))
425 /* Two local variables for possessive quantifiers (char1 cannot use them). */
426 #define POSSESSIVE0 (2 * sizeof(sljit_sw))
427 #define POSSESSIVE1 (3 * sizeof(sljit_sw))
428 /* Max limit of recursions. */
429 #define CALL_LIMIT (4 * sizeof(sljit_sw))
430 /* The output vector is stored on the stack, and contains pointers
431 to characters. The vector data is divided into two groups: the first
432 group contains the start / end character pointers, and the second is
433 the start pointers when the end of the capturing group has not yet reached. */
434 #define OVECTOR_START (common->ovector_start)
435 #define OVECTOR(i) (OVECTOR_START + (i) * sizeof(sljit_sw))
436 #define OVECTOR_PRIV(i) (common->cbraptr + (i) * sizeof(sljit_sw))
437 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
438
439 #if defined COMPILE_PCRE8
440 #define MOV_UCHAR SLJIT_MOV_UB
441 #define MOVU_UCHAR SLJIT_MOVU_UB
442 #elif defined COMPILE_PCRE16
443 #define MOV_UCHAR SLJIT_MOV_UH
444 #define MOVU_UCHAR SLJIT_MOVU_UH
445 #elif defined COMPILE_PCRE32
446 #define MOV_UCHAR SLJIT_MOV_UI
447 #define MOVU_UCHAR SLJIT_MOVU_UI
448 #else
449 #error Unsupported compiling mode
450 #endif
451
452 /* Shortcuts. */
453 #define DEFINE_COMPILER \
454 struct sljit_compiler *compiler = common->compiler
455 #define OP1(op, dst, dstw, src, srcw) \
456 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
457 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
458 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
459 #define LABEL() \
460 sljit_emit_label(compiler)
461 #define JUMP(type) \
462 sljit_emit_jump(compiler, (type))
463 #define JUMPTO(type, label) \
464 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
465 #define JUMPHERE(jump) \
466 sljit_set_label((jump), sljit_emit_label(compiler))
467 #define SET_LABEL(jump, label) \
468 sljit_set_label((jump), (label))
469 #define CMP(type, src1, src1w, src2, src2w) \
470 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
471 #define CMPTO(type, src1, src1w, src2, src2w, label) \
472 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
473 #define OP_FLAGS(op, dst, dstw, src, srcw, type) \
474 sljit_emit_op_flags(compiler, (op), (dst), (dstw), (src), (srcw), (type))
475 #define GET_LOCAL_BASE(dst, dstw, offset) \
476 sljit_get_local_base(compiler, (dst), (dstw), (offset))
477
478 static pcre_uchar* bracketend(pcre_uchar* cc)
479 {
480 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
481 do cc += GET(cc, 1); while (*cc == OP_ALT);
482 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
483 cc += 1 + LINK_SIZE;
484 return cc;
485 }
486
487 /* Functions whose might need modification for all new supported opcodes:
488 next_opcode
489 get_private_data_length
490 set_private_data_ptrs
491 get_framesize
492 init_frame
493 get_private_data_length_for_copy
494 copy_private_data
495 compile_matchingpath
496 compile_backtrackingpath
497 */
498
499 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
500 {
501 SLJIT_UNUSED_ARG(common);
502 switch(*cc)
503 {
504 case OP_SOD:
505 case OP_SOM:
506 case OP_SET_SOM:
507 case OP_NOT_WORD_BOUNDARY:
508 case OP_WORD_BOUNDARY:
509 case OP_NOT_DIGIT:
510 case OP_DIGIT:
511 case OP_NOT_WHITESPACE:
512 case OP_WHITESPACE:
513 case OP_NOT_WORDCHAR:
514 case OP_WORDCHAR:
515 case OP_ANY:
516 case OP_ALLANY:
517 case OP_NOTPROP:
518 case OP_PROP:
519 case OP_ANYNL:
520 case OP_NOT_HSPACE:
521 case OP_HSPACE:
522 case OP_NOT_VSPACE:
523 case OP_VSPACE:
524 case OP_EXTUNI:
525 case OP_EODN:
526 case OP_EOD:
527 case OP_CIRC:
528 case OP_CIRCM:
529 case OP_DOLL:
530 case OP_DOLLM:
531 case OP_CRSTAR:
532 case OP_CRMINSTAR:
533 case OP_CRPLUS:
534 case OP_CRMINPLUS:
535 case OP_CRQUERY:
536 case OP_CRMINQUERY:
537 case OP_CRRANGE:
538 case OP_CRMINRANGE:
539 case OP_CLASS:
540 case OP_NCLASS:
541 case OP_REF:
542 case OP_REFI:
543 case OP_RECURSE:
544 case OP_CALLOUT:
545 case OP_ALT:
546 case OP_KET:
547 case OP_KETRMAX:
548 case OP_KETRMIN:
549 case OP_KETRPOS:
550 case OP_REVERSE:
551 case OP_ASSERT:
552 case OP_ASSERT_NOT:
553 case OP_ASSERTBACK:
554 case OP_ASSERTBACK_NOT:
555 case OP_ONCE:
556 case OP_ONCE_NC:
557 case OP_BRA:
558 case OP_BRAPOS:
559 case OP_CBRA:
560 case OP_CBRAPOS:
561 case OP_COND:
562 case OP_SBRA:
563 case OP_SBRAPOS:
564 case OP_SCBRA:
565 case OP_SCBRAPOS:
566 case OP_SCOND:
567 case OP_CREF:
568 case OP_NCREF:
569 case OP_RREF:
570 case OP_NRREF:
571 case OP_DEF:
572 case OP_BRAZERO:
573 case OP_BRAMINZERO:
574 case OP_BRAPOSZERO:
575 case OP_COMMIT:
576 case OP_FAIL:
577 case OP_ACCEPT:
578 case OP_ASSERT_ACCEPT:
579 case OP_CLOSE:
580 case OP_SKIPZERO:
581 return cc + PRIV(OP_lengths)[*cc];
582
583 case OP_CHAR:
584 case OP_CHARI:
585 case OP_NOT:
586 case OP_NOTI:
587 case OP_STAR:
588 case OP_MINSTAR:
589 case OP_PLUS:
590 case OP_MINPLUS:
591 case OP_QUERY:
592 case OP_MINQUERY:
593 case OP_UPTO:
594 case OP_MINUPTO:
595 case OP_EXACT:
596 case OP_POSSTAR:
597 case OP_POSPLUS:
598 case OP_POSQUERY:
599 case OP_POSUPTO:
600 case OP_STARI:
601 case OP_MINSTARI:
602 case OP_PLUSI:
603 case OP_MINPLUSI:
604 case OP_QUERYI:
605 case OP_MINQUERYI:
606 case OP_UPTOI:
607 case OP_MINUPTOI:
608 case OP_EXACTI:
609 case OP_POSSTARI:
610 case OP_POSPLUSI:
611 case OP_POSQUERYI:
612 case OP_POSUPTOI:
613 case OP_NOTSTAR:
614 case OP_NOTMINSTAR:
615 case OP_NOTPLUS:
616 case OP_NOTMINPLUS:
617 case OP_NOTQUERY:
618 case OP_NOTMINQUERY:
619 case OP_NOTUPTO:
620 case OP_NOTMINUPTO:
621 case OP_NOTEXACT:
622 case OP_NOTPOSSTAR:
623 case OP_NOTPOSPLUS:
624 case OP_NOTPOSQUERY:
625 case OP_NOTPOSUPTO:
626 case OP_NOTSTARI:
627 case OP_NOTMINSTARI:
628 case OP_NOTPLUSI:
629 case OP_NOTMINPLUSI:
630 case OP_NOTQUERYI:
631 case OP_NOTMINQUERYI:
632 case OP_NOTUPTOI:
633 case OP_NOTMINUPTOI:
634 case OP_NOTEXACTI:
635 case OP_NOTPOSSTARI:
636 case OP_NOTPOSPLUSI:
637 case OP_NOTPOSQUERYI:
638 case OP_NOTPOSUPTOI:
639 cc += PRIV(OP_lengths)[*cc];
640 #ifdef SUPPORT_UTF
641 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
642 #endif
643 return cc;
644
645 /* Special cases. */
646 case OP_TYPESTAR:
647 case OP_TYPEMINSTAR:
648 case OP_TYPEPLUS:
649 case OP_TYPEMINPLUS:
650 case OP_TYPEQUERY:
651 case OP_TYPEMINQUERY:
652 case OP_TYPEUPTO:
653 case OP_TYPEMINUPTO:
654 case OP_TYPEEXACT:
655 case OP_TYPEPOSSTAR:
656 case OP_TYPEPOSPLUS:
657 case OP_TYPEPOSQUERY:
658 case OP_TYPEPOSUPTO:
659 return cc + PRIV(OP_lengths)[*cc] - 1;
660
661 case OP_ANYBYTE:
662 #ifdef SUPPORT_UTF
663 if (common->utf) return NULL;
664 #endif
665 return cc + 1;
666
667 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
668 case OP_XCLASS:
669 return cc + GET(cc, 1);
670 #endif
671
672 case OP_MARK:
673 return cc + 1 + 2 + cc[1];
674
675 default:
676 return NULL;
677 }
678 }
679
680 #define CASE_ITERATOR_PRIVATE_DATA_1 \
681 case OP_MINSTAR: \
682 case OP_MINPLUS: \
683 case OP_QUERY: \
684 case OP_MINQUERY: \
685 case OP_MINSTARI: \
686 case OP_MINPLUSI: \
687 case OP_QUERYI: \
688 case OP_MINQUERYI: \
689 case OP_NOTMINSTAR: \
690 case OP_NOTMINPLUS: \
691 case OP_NOTQUERY: \
692 case OP_NOTMINQUERY: \
693 case OP_NOTMINSTARI: \
694 case OP_NOTMINPLUSI: \
695 case OP_NOTQUERYI: \
696 case OP_NOTMINQUERYI:
697
698 #define CASE_ITERATOR_PRIVATE_DATA_2A \
699 case OP_STAR: \
700 case OP_PLUS: \
701 case OP_STARI: \
702 case OP_PLUSI: \
703 case OP_NOTSTAR: \
704 case OP_NOTPLUS: \
705 case OP_NOTSTARI: \
706 case OP_NOTPLUSI:
707
708 #define CASE_ITERATOR_PRIVATE_DATA_2B \
709 case OP_UPTO: \
710 case OP_MINUPTO: \
711 case OP_UPTOI: \
712 case OP_MINUPTOI: \
713 case OP_NOTUPTO: \
714 case OP_NOTMINUPTO: \
715 case OP_NOTUPTOI: \
716 case OP_NOTMINUPTOI:
717
718 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
719 case OP_TYPEMINSTAR: \
720 case OP_TYPEMINPLUS: \
721 case OP_TYPEQUERY: \
722 case OP_TYPEMINQUERY:
723
724 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
725 case OP_TYPESTAR: \
726 case OP_TYPEPLUS:
727
728 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
729 case OP_TYPEUPTO: \
730 case OP_TYPEMINUPTO:
731
732 static int get_class_iterator_size(pcre_uchar *cc)
733 {
734 switch(*cc)
735 {
736 case OP_CRSTAR:
737 case OP_CRPLUS:
738 return 2;
739
740 case OP_CRMINSTAR:
741 case OP_CRMINPLUS:
742 case OP_CRQUERY:
743 case OP_CRMINQUERY:
744 return 1;
745
746 case OP_CRRANGE:
747 case OP_CRMINRANGE:
748 if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
749 return 0;
750 return 2;
751
752 default:
753 return 0;
754 }
755 }
756
757 static int get_private_data_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
758 {
759 int private_data_length = 0;
760 pcre_uchar *alternative;
761 pcre_uchar *name;
762 pcre_uchar *end = NULL;
763 int space, size, i;
764 pcre_uint32 bracketlen;
765
766 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
767 while (cc < ccend)
768 {
769 space = 0;
770 size = 0;
771 bracketlen = 0;
772 switch(*cc)
773 {
774 case OP_SET_SOM:
775 common->has_set_som = TRUE;
776 cc += 1;
777 break;
778
779 case OP_REF:
780 case OP_REFI:
781 common->optimized_cbracket[GET2(cc, 1)] = 0;
782 cc += 1 + IMM2_SIZE;
783 break;
784
785 case OP_ASSERT:
786 case OP_ASSERT_NOT:
787 case OP_ASSERTBACK:
788 case OP_ASSERTBACK_NOT:
789 case OP_ONCE:
790 case OP_ONCE_NC:
791 case OP_BRAPOS:
792 case OP_SBRA:
793 case OP_SBRAPOS:
794 private_data_length += sizeof(sljit_sw);
795 bracketlen = 1 + LINK_SIZE;
796 break;
797
798 case OP_CBRAPOS:
799 case OP_SCBRAPOS:
800 private_data_length += sizeof(sljit_sw);
801 common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
802 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
803 break;
804
805 case OP_COND:
806 case OP_SCOND:
807 /* Only AUTO_CALLOUT can insert this opcode. We do
808 not intend to support this case. */
809 if (cc[1 + LINK_SIZE] == OP_CALLOUT)
810 return -1;
811
812 if (*cc == OP_COND)
813 {
814 /* Might be a hidden SCOND. */
815 alternative = cc + GET(cc, 1);
816 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
817 private_data_length += sizeof(sljit_sw);
818 }
819 else
820 private_data_length += sizeof(sljit_sw);
821 bracketlen = 1 + LINK_SIZE;
822 break;
823
824 case OP_CREF:
825 i = GET2(cc, 1);
826 common->optimized_cbracket[i] = 0;
827 cc += 1 + IMM2_SIZE;
828 break;
829
830 case OP_NCREF:
831 bracketlen = GET2(cc, 1);
832 name = (pcre_uchar *)common->name_table;
833 alternative = name;
834 for (i = 0; i < common->name_count; i++)
835 {
836 if (GET2(name, 0) == bracketlen) break;
837 name += common->name_entry_size;
838 }
839 SLJIT_ASSERT(i != common->name_count);
840
841 for (i = 0; i < common->name_count; i++)
842 {
843 if (STRCMP_UC_UC(alternative + IMM2_SIZE, name + IMM2_SIZE) == 0)
844 common->optimized_cbracket[GET2(alternative, 0)] = 0;
845 alternative += common->name_entry_size;
846 }
847 bracketlen = 0;
848 cc += 1 + IMM2_SIZE;
849 break;
850
851 case OP_BRA:
852 bracketlen = 1 + LINK_SIZE;
853 break;
854
855 case OP_CBRA:
856 case OP_SCBRA:
857 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
858 break;
859
860 CASE_ITERATOR_PRIVATE_DATA_1
861 space = 1;
862 size = -2;
863 break;
864
865 CASE_ITERATOR_PRIVATE_DATA_2A
866 space = 2;
867 size = -2;
868 break;
869
870 CASE_ITERATOR_PRIVATE_DATA_2B
871 space = 2;
872 size = -(2 + IMM2_SIZE);
873 break;
874
875 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
876 space = 1;
877 size = 1;
878 break;
879
880 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
881 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
882 space = 2;
883 size = 1;
884 break;
885
886 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
887 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
888 space = 2;
889 size = 1 + IMM2_SIZE;
890 break;
891
892 case OP_CLASS:
893 case OP_NCLASS:
894 size += 1 + 32 / sizeof(pcre_uchar);
895 space = get_class_iterator_size(cc + size);
896 break;
897
898 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
899 case OP_XCLASS:
900 size = GET(cc, 1);
901 space = get_class_iterator_size(cc + size);
902 break;
903 #endif
904
905 case OP_RECURSE:
906 /* Set its value only once. */
907 if (common->recursive_head_ptr == 0)
908 {
909 common->recursive_head_ptr = common->ovector_start;
910 common->ovector_start += sizeof(sljit_sw);
911 }
912 cc += 1 + LINK_SIZE;
913 break;
914
915 case OP_CALLOUT:
916 if (common->capture_last_ptr == 0)
917 {
918 common->capture_last_ptr = common->ovector_start;
919 common->ovector_start += sizeof(sljit_sw);
920 }
921 cc += 2 + 2 * LINK_SIZE;
922 break;
923
924 case OP_MARK:
925 if (common->mark_ptr == 0)
926 {
927 common->mark_ptr = common->ovector_start;
928 common->ovector_start += sizeof(sljit_sw);
929 }
930 cc += 1 + 2 + cc[1];
931 break;
932
933 default:
934 cc = next_opcode(common, cc);
935 if (cc == NULL)
936 return -1;
937 break;
938 }
939
940 if (space > 0 && cc >= end)
941 private_data_length += sizeof(sljit_sw) * space;
942
943 if (size != 0)
944 {
945 if (size < 0)
946 {
947 cc += -size;
948 #ifdef SUPPORT_UTF
949 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
950 #endif
951 }
952 else
953 cc += size;
954 }
955
956 if (bracketlen != 0)
957 {
958 if (cc >= end)
959 {
960 end = bracketend(cc);
961 if (end[-1 - LINK_SIZE] == OP_KET)
962 end = NULL;
963 }
964 cc += bracketlen;
965 }
966 }
967 return private_data_length;
968 }
969
970 static void set_private_data_ptrs(compiler_common *common, int private_data_ptr, pcre_uchar *ccend)
971 {
972 pcre_uchar *cc = common->start;
973 pcre_uchar *alternative;
974 pcre_uchar *end = NULL;
975 int space, size, bracketlen;
976
977 while (cc < ccend)
978 {
979 space = 0;
980 size = 0;
981 bracketlen = 0;
982 switch(*cc)
983 {
984 case OP_ASSERT:
985 case OP_ASSERT_NOT:
986 case OP_ASSERTBACK:
987 case OP_ASSERTBACK_NOT:
988 case OP_ONCE:
989 case OP_ONCE_NC:
990 case OP_BRAPOS:
991 case OP_SBRA:
992 case OP_SBRAPOS:
993 case OP_SCOND:
994 common->private_data_ptrs[cc - common->start] = private_data_ptr;
995 private_data_ptr += sizeof(sljit_sw);
996 bracketlen = 1 + LINK_SIZE;
997 break;
998
999 case OP_CBRAPOS:
1000 case OP_SCBRAPOS:
1001 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1002 private_data_ptr += sizeof(sljit_sw);
1003 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1004 break;
1005
1006 case OP_COND:
1007 /* Might be a hidden SCOND. */
1008 alternative = cc + GET(cc, 1);
1009 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1010 {
1011 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1012 private_data_ptr += sizeof(sljit_sw);
1013 }
1014 bracketlen = 1 + LINK_SIZE;
1015 break;
1016
1017 case OP_BRA:
1018 bracketlen = 1 + LINK_SIZE;
1019 break;
1020
1021 case OP_CBRA:
1022 case OP_SCBRA:
1023 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1024 break;
1025
1026 CASE_ITERATOR_PRIVATE_DATA_1
1027 space = 1;
1028 size = -2;
1029 break;
1030
1031 CASE_ITERATOR_PRIVATE_DATA_2A
1032 space = 2;
1033 size = -2;
1034 break;
1035
1036 CASE_ITERATOR_PRIVATE_DATA_2B
1037 space = 2;
1038 size = -(2 + IMM2_SIZE);
1039 break;
1040
1041 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1042 space = 1;
1043 size = 1;
1044 break;
1045
1046 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1047 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1048 space = 2;
1049 size = 1;
1050 break;
1051
1052 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1053 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1054 space = 2;
1055 size = 1 + IMM2_SIZE;
1056 break;
1057
1058 case OP_CLASS:
1059 case OP_NCLASS:
1060 size += 1 + 32 / sizeof(pcre_uchar);
1061 space = get_class_iterator_size(cc + size);
1062 break;
1063
1064 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1065 case OP_XCLASS:
1066 size = GET(cc, 1);
1067 space = get_class_iterator_size(cc + size);
1068 break;
1069 #endif
1070
1071 default:
1072 cc = next_opcode(common, cc);
1073 SLJIT_ASSERT(cc != NULL);
1074 break;
1075 }
1076
1077 if (space > 0 && cc >= end)
1078 {
1079 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1080 private_data_ptr += sizeof(sljit_sw) * space;
1081 }
1082
1083 if (size != 0)
1084 {
1085 if (size < 0)
1086 {
1087 cc += -size;
1088 #ifdef SUPPORT_UTF
1089 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1090 #endif
1091 }
1092 else
1093 cc += size;
1094 }
1095
1096 if (bracketlen > 0)
1097 {
1098 if (cc >= end)
1099 {
1100 end = bracketend(cc);
1101 if (end[-1 - LINK_SIZE] == OP_KET)
1102 end = NULL;
1103 }
1104 cc += bracketlen;
1105 }
1106 }
1107 }
1108
1109 /* Returns with a frame_types (always < 0) if no need for frame. */
1110 static int get_framesize(compiler_common *common, pcre_uchar *cc, BOOL recursive)
1111 {
1112 pcre_uchar *ccend = bracketend(cc) - (1 + LINK_SIZE);
1113 int length = 0;
1114 int possessive = 0;
1115 BOOL stack_restore = FALSE;
1116 BOOL setsom_found = recursive;
1117 BOOL setmark_found = recursive;
1118 /* The last capture is a local variable even for recursions. */
1119 BOOL capture_last_found = FALSE;
1120
1121 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1122 {
1123 possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1124 /* This is correct regardless of common->capture_last_ptr. */
1125 capture_last_found = TRUE;
1126 }
1127
1128 cc = next_opcode(common, cc);
1129 SLJIT_ASSERT(cc != NULL);
1130 while (cc < ccend)
1131 switch(*cc)
1132 {
1133 case OP_SET_SOM:
1134 SLJIT_ASSERT(common->has_set_som);
1135 stack_restore = TRUE;
1136 if (!setsom_found)
1137 {
1138 length += 2;
1139 setsom_found = TRUE;
1140 }
1141 cc += 1;
1142 break;
1143
1144 case OP_MARK:
1145 SLJIT_ASSERT(common->mark_ptr != 0);
1146 stack_restore = TRUE;
1147 if (!setmark_found)
1148 {
1149 length += 2;
1150 setmark_found = TRUE;
1151 }
1152 cc += 1 + 2 + cc[1];
1153 break;
1154
1155 case OP_RECURSE:
1156 stack_restore = TRUE;
1157 if (common->has_set_som && !setsom_found)
1158 {
1159 length += 2;
1160 setsom_found = TRUE;
1161 }
1162 if (common->mark_ptr != 0 && !setmark_found)
1163 {
1164 length += 2;
1165 setmark_found = TRUE;
1166 }
1167 if (common->capture_last_ptr != 0 && !capture_last_found)
1168 {
1169 length += 2;
1170 capture_last_found = TRUE;
1171 }
1172 cc += 1 + LINK_SIZE;
1173 break;
1174
1175 case OP_CBRA:
1176 case OP_CBRAPOS:
1177 case OP_SCBRA:
1178 case OP_SCBRAPOS:
1179 stack_restore = TRUE;
1180 if (common->capture_last_ptr != 0 && !capture_last_found)
1181 {
1182 length += 2;
1183 capture_last_found = TRUE;
1184 }
1185 length += 3;
1186 cc += 1 + LINK_SIZE + IMM2_SIZE;
1187 break;
1188
1189 default:
1190 stack_restore = TRUE;
1191 /* Fall through. */
1192
1193 case OP_NOT_WORD_BOUNDARY:
1194 case OP_WORD_BOUNDARY:
1195 case OP_NOT_DIGIT:
1196 case OP_DIGIT:
1197 case OP_NOT_WHITESPACE:
1198 case OP_WHITESPACE:
1199 case OP_NOT_WORDCHAR:
1200 case OP_WORDCHAR:
1201 case OP_ANY:
1202 case OP_ALLANY:
1203 case OP_ANYBYTE:
1204 case OP_NOTPROP:
1205 case OP_PROP:
1206 case OP_ANYNL:
1207 case OP_NOT_HSPACE:
1208 case OP_HSPACE:
1209 case OP_NOT_VSPACE:
1210 case OP_VSPACE:
1211 case OP_EXTUNI:
1212 case OP_EODN:
1213 case OP_EOD:
1214 case OP_CIRC:
1215 case OP_CIRCM:
1216 case OP_DOLL:
1217 case OP_DOLLM:
1218 case OP_CHAR:
1219 case OP_CHARI:
1220 case OP_NOT:
1221 case OP_NOTI:
1222
1223 case OP_EXACT:
1224 case OP_POSSTAR:
1225 case OP_POSPLUS:
1226 case OP_POSQUERY:
1227 case OP_POSUPTO:
1228
1229 case OP_EXACTI:
1230 case OP_POSSTARI:
1231 case OP_POSPLUSI:
1232 case OP_POSQUERYI:
1233 case OP_POSUPTOI:
1234
1235 case OP_NOTEXACT:
1236 case OP_NOTPOSSTAR:
1237 case OP_NOTPOSPLUS:
1238 case OP_NOTPOSQUERY:
1239 case OP_NOTPOSUPTO:
1240
1241 case OP_NOTEXACTI:
1242 case OP_NOTPOSSTARI:
1243 case OP_NOTPOSPLUSI:
1244 case OP_NOTPOSQUERYI:
1245 case OP_NOTPOSUPTOI:
1246
1247 case OP_TYPEEXACT:
1248 case OP_TYPEPOSSTAR:
1249 case OP_TYPEPOSPLUS:
1250 case OP_TYPEPOSQUERY:
1251 case OP_TYPEPOSUPTO:
1252
1253 case OP_CLASS:
1254 case OP_NCLASS:
1255 case OP_XCLASS:
1256
1257 cc = next_opcode(common, cc);
1258 SLJIT_ASSERT(cc != NULL);
1259 break;
1260 }
1261
1262 /* Possessive quantifiers can use a special case. */
1263 if (SLJIT_UNLIKELY(possessive == length))
1264 return stack_restore ? no_frame : no_stack;
1265
1266 if (length > 0)
1267 return length + 1;
1268 return stack_restore ? no_frame : no_stack;
1269 }
1270
1271 static void init_frame(compiler_common *common, pcre_uchar *cc, int stackpos, int stacktop, BOOL recursive)
1272 {
1273 DEFINE_COMPILER;
1274 pcre_uchar *ccend = bracketend(cc) - (1 + LINK_SIZE);
1275 BOOL setsom_found = recursive;
1276 BOOL setmark_found = recursive;
1277 /* The last capture is a local variable even for recursions. */
1278 BOOL capture_last_found = FALSE;
1279 int offset;
1280
1281 /* >= 1 + shortest item size (2) */
1282 SLJIT_UNUSED_ARG(stacktop);
1283 SLJIT_ASSERT(stackpos >= stacktop + 2);
1284
1285 stackpos = STACK(stackpos);
1286 if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1287 cc = next_opcode(common, cc);
1288 SLJIT_ASSERT(cc != NULL);
1289 while (cc < ccend)
1290 switch(*cc)
1291 {
1292 case OP_SET_SOM:
1293 SLJIT_ASSERT(common->has_set_som);
1294 if (!setsom_found)
1295 {
1296 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1297 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1298 stackpos += (int)sizeof(sljit_sw);
1299 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1300 stackpos += (int)sizeof(sljit_sw);
1301 setsom_found = TRUE;
1302 }
1303 cc += 1;
1304 break;
1305
1306 case OP_MARK:
1307 SLJIT_ASSERT(common->mark_ptr != 0);
1308 if (!setmark_found)
1309 {
1310 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1311 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1312 stackpos += (int)sizeof(sljit_sw);
1313 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1314 stackpos += (int)sizeof(sljit_sw);
1315 setmark_found = TRUE;
1316 }
1317 cc += 1 + 2 + cc[1];
1318 break;
1319
1320 case OP_RECURSE:
1321 if (common->has_set_som && !setsom_found)
1322 {
1323 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1324 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1325 stackpos += (int)sizeof(sljit_sw);
1326 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1327 stackpos += (int)sizeof(sljit_sw);
1328 setsom_found = TRUE;
1329 }
1330 if (common->mark_ptr != 0 && !setmark_found)
1331 {
1332 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1333 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1334 stackpos += (int)sizeof(sljit_sw);
1335 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1336 stackpos += (int)sizeof(sljit_sw);
1337 setmark_found = TRUE;
1338 }
1339 if (common->capture_last_ptr != 0 && !capture_last_found)
1340 {
1341 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
1342 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1343 stackpos += (int)sizeof(sljit_sw);
1344 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1345 stackpos += (int)sizeof(sljit_sw);
1346 capture_last_found = TRUE;
1347 }
1348 cc += 1 + LINK_SIZE;
1349 break;
1350
1351 case OP_CBRA:
1352 case OP_CBRAPOS:
1353 case OP_SCBRA:
1354 case OP_SCBRAPOS:
1355 if (common->capture_last_ptr != 0 && !capture_last_found)
1356 {
1357 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
1358 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1359 stackpos += (int)sizeof(sljit_sw);
1360 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1361 stackpos += (int)sizeof(sljit_sw);
1362 capture_last_found = TRUE;
1363 }
1364 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1365 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1366 stackpos += (int)sizeof(sljit_sw);
1367 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
1368 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
1369 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1370 stackpos += (int)sizeof(sljit_sw);
1371 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1372 stackpos += (int)sizeof(sljit_sw);
1373
1374 cc += 1 + LINK_SIZE + IMM2_SIZE;
1375 break;
1376
1377 default:
1378 cc = next_opcode(common, cc);
1379 SLJIT_ASSERT(cc != NULL);
1380 break;
1381 }
1382
1383 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
1384 SLJIT_ASSERT(stackpos == STACK(stacktop));
1385 }
1386
1387 static SLJIT_INLINE int get_private_data_length_for_copy(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
1388 {
1389 int private_data_length = 2;
1390 int size;
1391 pcre_uchar *alternative;
1392 /* Calculate the sum of the private machine words. */
1393 while (cc < ccend)
1394 {
1395 size = 0;
1396 switch(*cc)
1397 {
1398 case OP_ASSERT:
1399 case OP_ASSERT_NOT:
1400 case OP_ASSERTBACK:
1401 case OP_ASSERTBACK_NOT:
1402 case OP_ONCE:
1403 case OP_ONCE_NC:
1404 case OP_BRAPOS:
1405 case OP_SBRA:
1406 case OP_SBRAPOS:
1407 case OP_SCOND:
1408 private_data_length++;
1409 cc += 1 + LINK_SIZE;
1410 break;
1411
1412 case OP_CBRA:
1413 case OP_SCBRA:
1414 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1415 private_data_length++;
1416 cc += 1 + LINK_SIZE + IMM2_SIZE;
1417 break;
1418
1419 case OP_CBRAPOS:
1420 case OP_SCBRAPOS:
1421 private_data_length += 2;
1422 cc += 1 + LINK_SIZE + IMM2_SIZE;
1423 break;
1424
1425 case OP_COND:
1426 /* Might be a hidden SCOND. */
1427 alternative = cc + GET(cc, 1);
1428 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1429 private_data_length++;
1430 cc += 1 + LINK_SIZE;
1431 break;
1432
1433 CASE_ITERATOR_PRIVATE_DATA_1
1434 if (PRIVATE_DATA(cc))
1435 private_data_length++;
1436 cc += 2;
1437 #ifdef SUPPORT_UTF
1438 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1439 #endif
1440 break;
1441
1442 CASE_ITERATOR_PRIVATE_DATA_2A
1443 if (PRIVATE_DATA(cc))
1444 private_data_length += 2;
1445 cc += 2;
1446 #ifdef SUPPORT_UTF
1447 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1448 #endif
1449 break;
1450
1451 CASE_ITERATOR_PRIVATE_DATA_2B
1452 if (PRIVATE_DATA(cc))
1453 private_data_length += 2;
1454 cc += 2 + IMM2_SIZE;
1455 #ifdef SUPPORT_UTF
1456 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1457 #endif
1458 break;
1459
1460 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1461 if (PRIVATE_DATA(cc))
1462 private_data_length++;
1463 cc += 1;
1464 break;
1465
1466 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1467 if (PRIVATE_DATA(cc))
1468 private_data_length += 2;
1469 cc += 1;
1470 break;
1471
1472 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1473 if (PRIVATE_DATA(cc))
1474 private_data_length += 2;
1475 cc += 1 + IMM2_SIZE;
1476 break;
1477
1478 case OP_CLASS:
1479 case OP_NCLASS:
1480 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1481 case OP_XCLASS:
1482 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1483 #else
1484 size = 1 + 32 / (int)sizeof(pcre_uchar);
1485 #endif
1486 if (PRIVATE_DATA(cc))
1487 private_data_length += get_class_iterator_size(cc + size);
1488 cc += size;
1489 break;
1490
1491 default:
1492 cc = next_opcode(common, cc);
1493 SLJIT_ASSERT(cc != NULL);
1494 break;
1495 }
1496 }
1497 SLJIT_ASSERT(cc == ccend);
1498 return private_data_length;
1499 }
1500
1501 static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1502 BOOL save, int stackptr, int stacktop)
1503 {
1504 DEFINE_COMPILER;
1505 int srcw[2];
1506 int count, size;
1507 BOOL tmp1next = TRUE;
1508 BOOL tmp1empty = TRUE;
1509 BOOL tmp2empty = TRUE;
1510 pcre_uchar *alternative;
1511 enum {
1512 start,
1513 loop,
1514 end
1515 } status;
1516
1517 status = save ? start : loop;
1518 stackptr = STACK(stackptr - 2);
1519 stacktop = STACK(stacktop - 1);
1520
1521 if (!save)
1522 {
1523 stackptr += sizeof(sljit_sw);
1524 if (stackptr < stacktop)
1525 {
1526 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1527 stackptr += sizeof(sljit_sw);
1528 tmp1empty = FALSE;
1529 }
1530 if (stackptr < stacktop)
1531 {
1532 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1533 stackptr += sizeof(sljit_sw);
1534 tmp2empty = FALSE;
1535 }
1536 /* The tmp1next must be TRUE in either way. */
1537 }
1538
1539 while (status != end)
1540 {
1541 count = 0;
1542 switch(status)
1543 {
1544 case start:
1545 SLJIT_ASSERT(save && common->recursive_head_ptr != 0);
1546 count = 1;
1547 srcw[0] = common->recursive_head_ptr;
1548 status = loop;
1549 break;
1550
1551 case loop:
1552 if (cc >= ccend)
1553 {
1554 status = end;
1555 break;
1556 }
1557
1558 switch(*cc)
1559 {
1560 case OP_ASSERT:
1561 case OP_ASSERT_NOT:
1562 case OP_ASSERTBACK:
1563 case OP_ASSERTBACK_NOT:
1564 case OP_ONCE:
1565 case OP_ONCE_NC:
1566 case OP_BRAPOS:
1567 case OP_SBRA:
1568 case OP_SBRAPOS:
1569 case OP_SCOND:
1570 count = 1;
1571 srcw[0] = PRIVATE_DATA(cc);
1572 SLJIT_ASSERT(srcw[0] != 0);
1573 cc += 1 + LINK_SIZE;
1574 break;
1575
1576 case OP_CBRA:
1577 case OP_SCBRA:
1578 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1579 {
1580 count = 1;
1581 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1582 }
1583 cc += 1 + LINK_SIZE + IMM2_SIZE;
1584 break;
1585
1586 case OP_CBRAPOS:
1587 case OP_SCBRAPOS:
1588 count = 2;
1589 srcw[0] = PRIVATE_DATA(cc);
1590 srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1591 SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0);
1592 cc += 1 + LINK_SIZE + IMM2_SIZE;
1593 break;
1594
1595 case OP_COND:
1596 /* Might be a hidden SCOND. */
1597 alternative = cc + GET(cc, 1);
1598 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1599 {
1600 count = 1;
1601 srcw[0] = PRIVATE_DATA(cc);
1602 SLJIT_ASSERT(srcw[0] != 0);
1603 }
1604 cc += 1 + LINK_SIZE;
1605 break;
1606
1607 CASE_ITERATOR_PRIVATE_DATA_1
1608 if (PRIVATE_DATA(cc))
1609 {
1610 count = 1;
1611 srcw[0] = PRIVATE_DATA(cc);
1612 }
1613 cc += 2;
1614 #ifdef SUPPORT_UTF
1615 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1616 #endif
1617 break;
1618
1619 CASE_ITERATOR_PRIVATE_DATA_2A
1620 if (PRIVATE_DATA(cc))
1621 {
1622 count = 2;
1623 srcw[0] = PRIVATE_DATA(cc);
1624 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1625 }
1626 cc += 2;
1627 #ifdef SUPPORT_UTF
1628 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1629 #endif
1630 break;
1631
1632 CASE_ITERATOR_PRIVATE_DATA_2B
1633 if (PRIVATE_DATA(cc))
1634 {
1635 count = 2;
1636 srcw[0] = PRIVATE_DATA(cc);
1637 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1638 }
1639 cc += 2 + IMM2_SIZE;
1640 #ifdef SUPPORT_UTF
1641 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1642 #endif
1643 break;
1644
1645 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1646 if (PRIVATE_DATA(cc))
1647 {
1648 count = 1;
1649 srcw[0] = PRIVATE_DATA(cc);
1650 }
1651 cc += 1;
1652 break;
1653
1654 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1655 if (PRIVATE_DATA(cc))
1656 {
1657 count = 2;
1658 srcw[0] = PRIVATE_DATA(cc);
1659 srcw[1] = srcw[0] + sizeof(sljit_sw);
1660 }
1661 cc += 1;
1662 break;
1663
1664 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1665 if (PRIVATE_DATA(cc))
1666 {
1667 count = 2;
1668 srcw[0] = PRIVATE_DATA(cc);
1669 srcw[1] = srcw[0] + sizeof(sljit_sw);
1670 }
1671 cc += 1 + IMM2_SIZE;
1672 break;
1673
1674 case OP_CLASS:
1675 case OP_NCLASS:
1676 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1677 case OP_XCLASS:
1678 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1679 #else
1680 size = 1 + 32 / (int)sizeof(pcre_uchar);
1681 #endif
1682 if (PRIVATE_DATA(cc))
1683 switch(get_class_iterator_size(cc + size))
1684 {
1685 case 1:
1686 count = 1;
1687 srcw[0] = PRIVATE_DATA(cc);
1688 break;
1689
1690 case 2:
1691 count = 2;
1692 srcw[0] = PRIVATE_DATA(cc);
1693 srcw[1] = srcw[0] + sizeof(sljit_sw);
1694 break;
1695
1696 default:
1697 SLJIT_ASSERT_STOP();
1698 break;
1699 }
1700 cc += size;
1701 break;
1702
1703 default:
1704 cc = next_opcode(common, cc);
1705 SLJIT_ASSERT(cc != NULL);
1706 break;
1707 }
1708 break;
1709
1710 case end:
1711 SLJIT_ASSERT_STOP();
1712 break;
1713 }
1714
1715 while (count > 0)
1716 {
1717 count--;
1718 if (save)
1719 {
1720 if (tmp1next)
1721 {
1722 if (!tmp1empty)
1723 {
1724 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1725 stackptr += sizeof(sljit_sw);
1726 }
1727 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1728 tmp1empty = FALSE;
1729 tmp1next = FALSE;
1730 }
1731 else
1732 {
1733 if (!tmp2empty)
1734 {
1735 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1736 stackptr += sizeof(sljit_sw);
1737 }
1738 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1739 tmp2empty = FALSE;
1740 tmp1next = TRUE;
1741 }
1742 }
1743 else
1744 {
1745 if (tmp1next)
1746 {
1747 SLJIT_ASSERT(!tmp1empty);
1748 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP1, 0);
1749 tmp1empty = stackptr >= stacktop;
1750 if (!tmp1empty)
1751 {
1752 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1753 stackptr += sizeof(sljit_sw);
1754 }
1755 tmp1next = FALSE;
1756 }
1757 else
1758 {
1759 SLJIT_ASSERT(!tmp2empty);
1760 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP2, 0);
1761 tmp2empty = stackptr >= stacktop;
1762 if (!tmp2empty)
1763 {
1764 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1765 stackptr += sizeof(sljit_sw);
1766 }
1767 tmp1next = TRUE;
1768 }
1769 }
1770 }
1771 }
1772
1773 if (save)
1774 {
1775 if (tmp1next)
1776 {
1777 if (!tmp1empty)
1778 {
1779 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1780 stackptr += sizeof(sljit_sw);
1781 }
1782 if (!tmp2empty)
1783 {
1784 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1785 stackptr += sizeof(sljit_sw);
1786 }
1787 }
1788 else
1789 {
1790 if (!tmp2empty)
1791 {
1792 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1793 stackptr += sizeof(sljit_sw);
1794 }
1795 if (!tmp1empty)
1796 {
1797 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1798 stackptr += sizeof(sljit_sw);
1799 }
1800 }
1801 }
1802 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1803 }
1804
1805 #undef CASE_ITERATOR_PRIVATE_DATA_1
1806 #undef CASE_ITERATOR_PRIVATE_DATA_2A
1807 #undef CASE_ITERATOR_PRIVATE_DATA_2B
1808 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1809 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1810 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1811
1812 static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
1813 {
1814 return (value & (value - 1)) == 0;
1815 }
1816
1817 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
1818 {
1819 while (list)
1820 {
1821 /* sljit_set_label is clever enough to do nothing
1822 if either the jump or the label is NULL. */
1823 SET_LABEL(list->jump, label);
1824 list = list->next;
1825 }
1826 }
1827
1828 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump* jump)
1829 {
1830 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
1831 if (list_item)
1832 {
1833 list_item->next = *list;
1834 list_item->jump = jump;
1835 *list = list_item;
1836 }
1837 }
1838
1839 static void add_stub(compiler_common *common, struct sljit_jump *start)
1840 {
1841 DEFINE_COMPILER;
1842 stub_list* list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
1843
1844 if (list_item)
1845 {
1846 list_item->start = start;
1847 list_item->quit = LABEL();
1848 list_item->next = common->stubs;
1849 common->stubs = list_item;
1850 }
1851 }
1852
1853 static void flush_stubs(compiler_common *common)
1854 {
1855 DEFINE_COMPILER;
1856 stub_list* list_item = common->stubs;
1857
1858 while (list_item)
1859 {
1860 JUMPHERE(list_item->start);
1861 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
1862 JUMPTO(SLJIT_JUMP, list_item->quit);
1863 list_item = list_item->next;
1864 }
1865 common->stubs = NULL;
1866 }
1867
1868 static SLJIT_INLINE void decrease_call_count(compiler_common *common)
1869 {
1870 DEFINE_COMPILER;
1871
1872 OP2(SLJIT_SUB | SLJIT_SET_E, CALL_COUNT, 0, CALL_COUNT, 0, SLJIT_IMM, 1);
1873 add_jump(compiler, &common->calllimit, JUMP(SLJIT_C_ZERO));
1874 }
1875
1876 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
1877 {
1878 /* May destroy all locals and registers except TMP2. */
1879 DEFINE_COMPILER;
1880
1881 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
1882 #ifdef DESTROY_REGISTERS
1883 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
1884 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
1885 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
1886 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, TMP1, 0);
1887 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
1888 #endif
1889 add_stub(common, CMP(SLJIT_C_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
1890 }
1891
1892 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
1893 {
1894 DEFINE_COMPILER;
1895 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
1896 }
1897
1898 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
1899 {
1900 DEFINE_COMPILER;
1901 struct sljit_label *loop;
1902 int i;
1903 /* At this point we can freely use all temporary registers. */
1904 /* TMP1 returns with begin - 1. */
1905 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
1906 if (length < 8)
1907 {
1908 for (i = 0; i < length; i++)
1909 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), SLJIT_SCRATCH_REG1, 0);
1910 }
1911 else
1912 {
1913 GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, OVECTOR_START - sizeof(sljit_sw));
1914 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, length);
1915 loop = LABEL();
1916 OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(sljit_sw), SLJIT_SCRATCH_REG1, 0);
1917 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 1);
1918 JUMPTO(SLJIT_C_NOT_ZERO, loop);
1919 }
1920 }
1921
1922 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
1923 {
1924 DEFINE_COMPILER;
1925 struct sljit_label *loop;
1926 struct sljit_jump *early_quit;
1927
1928 /* At this point we can freely use all registers. */
1929 OP1(SLJIT_MOV, SLJIT_SAVED_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
1930 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1), STR_PTR, 0);
1931
1932 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, ARGUMENTS, 0);
1933 if (common->mark_ptr != 0)
1934 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1935 OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offset_count));
1936 if (common->mark_ptr != 0)
1937 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_SCRATCH_REG3, 0);
1938 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
1939 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, begin));
1940 GET_LOCAL_BASE(SLJIT_SAVED_REG1, 0, OVECTOR_START);
1941 /* Unlikely, but possible */
1942 early_quit = CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 0);
1943 loop = LABEL();
1944 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), 0, SLJIT_SCRATCH_REG1, 0);
1945 OP2(SLJIT_ADD, SLJIT_SAVED_REG1, 0, SLJIT_SAVED_REG1, 0, SLJIT_IMM, sizeof(sljit_sw));
1946 /* Copy the integer value to the output buffer */
1947 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
1948 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
1949 #endif
1950 OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG3), sizeof(int), SLJIT_SAVED_REG2, 0);
1951 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1);
1952 JUMPTO(SLJIT_C_NOT_ZERO, loop);
1953 JUMPHERE(early_quit);
1954
1955 /* Calculate the return value, which is the maximum ovector value. */
1956 if (topbracket > 1)
1957 {
1958 GET_LOCAL_BASE(SLJIT_SCRATCH_REG1, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
1959 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, topbracket + 1);
1960
1961 /* OVECTOR(0) is never equal to SLJIT_SAVED_REG3. */
1962 loop = LABEL();
1963 OP1(SLJIT_MOVU, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), -(2 * (sljit_sw)sizeof(sljit_sw)));
1964 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1);
1965 CMPTO(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG3, 0, loop);
1966 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_SCRATCH_REG2, 0);
1967 }
1968 else
1969 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
1970 }
1971
1972 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
1973 {
1974 DEFINE_COMPILER;
1975 struct sljit_jump *jump;
1976
1977 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_SAVED_REG2, str_end_must_be_saved_reg2);
1978 SLJIT_ASSERT(common->start_used_ptr != 0 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
1979
1980 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0);
1981 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
1982 OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, real_offset_count));
1983 CMPTO(SLJIT_C_SIG_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 2, quit);
1984
1985 /* Store match begin and end. */
1986 OP1(SLJIT_MOV, SLJIT_SAVED_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, begin));
1987 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, offsets));
1988
1989 jump = CMP(SLJIT_C_SIG_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 3);
1990 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr + sizeof(sljit_sw), SLJIT_SAVED_REG1, 0);
1991 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
1992 OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);
1993 #endif
1994 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), 2 * sizeof(int), SLJIT_SCRATCH_REG3, 0);
1995 JUMPHERE(jump);
1996
1997 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
1998 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, STR_END, 0, SLJIT_SAVED_REG1, 0);
1999 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2000 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
2001 #endif
2002 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(int), SLJIT_SAVED_REG2, 0);
2003
2004 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG1, 0);
2005 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2006 OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);
2007 #endif
2008 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), 0, SLJIT_SCRATCH_REG3, 0);
2009
2010 JUMPTO(SLJIT_JUMP, quit);
2011 }
2012
2013 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
2014 {
2015 /* May destroy TMP1. */
2016 DEFINE_COMPILER;
2017 struct sljit_jump *jump;
2018
2019 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2020 {
2021 /* The value of -1 must be kept for start_used_ptr! */
2022 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, 1);
2023 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
2024 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
2025 jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
2026 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2027 JUMPHERE(jump);
2028 }
2029 else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
2030 {
2031 jump = CMP(SLJIT_C_LESS_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2032 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2033 JUMPHERE(jump);
2034 }
2035 }
2036
2037 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar* cc)
2038 {
2039 /* Detects if the character has an othercase. */
2040 unsigned int c;
2041
2042 #ifdef SUPPORT_UTF
2043 if (common->utf)
2044 {
2045 GETCHAR(c, cc);
2046 if (c > 127)
2047 {
2048 #ifdef SUPPORT_UCP
2049 return c != UCD_OTHERCASE(c);
2050 #else
2051 return FALSE;
2052 #endif
2053 }
2054 #ifndef COMPILE_PCRE8
2055 return common->fcc[c] != c;
2056 #endif
2057 }
2058 else
2059 #endif
2060 c = *cc;
2061 return MAX_255(c) ? common->fcc[c] != c : FALSE;
2062 }
2063
2064 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
2065 {
2066 /* Returns with the othercase. */
2067 #ifdef SUPPORT_UTF
2068 if (common->utf && c > 127)
2069 {
2070 #ifdef SUPPORT_UCP
2071 return UCD_OTHERCASE(c);
2072 #else
2073 return c;
2074 #endif
2075 }
2076 #endif
2077 return TABLE_GET(c, common->fcc, c);
2078 }
2079
2080 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar* cc)
2081 {
2082 /* Detects if the character and its othercase has only 1 bit difference. */
2083 unsigned int c, oc, bit;
2084 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2085 int n;
2086 #endif
2087
2088 #ifdef SUPPORT_UTF
2089 if (common->utf)
2090 {
2091 GETCHAR(c, cc);
2092 if (c <= 127)
2093 oc = common->fcc[c];
2094 else
2095 {
2096 #ifdef SUPPORT_UCP
2097 oc = UCD_OTHERCASE(c);
2098 #else
2099 oc = c;
2100 #endif
2101 }
2102 }
2103 else
2104 {
2105 c = *cc;
2106 oc = TABLE_GET(c, common->fcc, c);
2107 }
2108 #else
2109 c = *cc;
2110 oc = TABLE_GET(c, common->fcc, c);
2111 #endif
2112
2113 SLJIT_ASSERT(c != oc);
2114
2115 bit = c ^ oc;
2116 /* Optimized for English alphabet. */
2117 if (c <= 127 && bit == 0x20)
2118 return (0 << 8) | 0x20;
2119
2120 /* Since c != oc, they must have at least 1 bit difference. */
2121 if (!is_powerof2(bit))
2122 return 0;
2123
2124 #if defined COMPILE_PCRE8
2125
2126 #ifdef SUPPORT_UTF
2127 if (common->utf && c > 127)
2128 {
2129 n = GET_EXTRALEN(*cc);
2130 while ((bit & 0x3f) == 0)
2131 {
2132 n--;
2133 bit >>= 6;
2134 }
2135 return (n << 8) | bit;
2136 }
2137 #endif /* SUPPORT_UTF */
2138 return (0 << 8) | bit;
2139
2140 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2141
2142 #ifdef SUPPORT_UTF
2143 if (common->utf && c > 65535)
2144 {
2145 if (bit >= (1 << 10))
2146 bit >>= 10;
2147 else
2148 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
2149 }
2150 #endif /* SUPPORT_UTF */
2151 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
2152
2153 #endif /* COMPILE_PCRE[8|16|32] */
2154 }
2155
2156 static void check_partial(compiler_common *common, BOOL force)
2157 {
2158 /* Checks whether a partial matching is occured. Does not modify registers. */
2159 DEFINE_COMPILER;
2160 struct sljit_jump *jump = NULL;
2161
2162 SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
2163
2164 if (common->mode == JIT_COMPILE)
2165 return;
2166
2167 if (!force)
2168 jump = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2169 else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2170 jump = CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, -1);
2171
2172 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2173 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2174 else
2175 {
2176 if (common->partialmatchlabel != NULL)
2177 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2178 else
2179 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2180 }
2181
2182 if (jump != NULL)
2183 JUMPHERE(jump);
2184 }
2185
2186 static void check_str_end(compiler_common *common, jump_list **end_reached)
2187 {
2188 /* Does not affect registers. Usually used in a tight spot. */
2189 DEFINE_COMPILER;
2190 struct sljit_jump *jump;
2191
2192 if (common->mode == JIT_COMPILE)
2193 {
2194 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2195 return;
2196 }
2197
2198 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2199 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2200 {
2201 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2202 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2203 add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
2204 }
2205 else
2206 {
2207 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2208 if (common->partialmatchlabel != NULL)
2209 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2210 else
2211 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2212 }
2213 JUMPHERE(jump);
2214 }
2215
2216 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
2217 {
2218 DEFINE_COMPILER;
2219 struct sljit_jump *jump;
2220
2221 if (common->mode == JIT_COMPILE)
2222 {
2223 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2224 return;
2225 }
2226
2227 /* Partial matching mode. */
2228 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2229 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2230 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2231 {
2232 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2233 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2234 }
2235 else
2236 {
2237 if (common->partialmatchlabel != NULL)
2238 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2239 else
2240 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2241 }
2242 JUMPHERE(jump);
2243 }
2244
2245 static void read_char(compiler_common *common)
2246 {
2247 /* Reads the character into TMP1, updates STR_PTR.
2248 Does not check STR_END. TMP2 Destroyed. */
2249 DEFINE_COMPILER;
2250 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2251 struct sljit_jump *jump;
2252 #endif
2253
2254 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2255 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2256 if (common->utf)
2257 {
2258 #if defined COMPILE_PCRE8
2259 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2260 #elif defined COMPILE_PCRE16
2261 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2262 #endif /* COMPILE_PCRE[8|16] */
2263 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2264 JUMPHERE(jump);
2265 }
2266 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2267 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2268 }
2269
2270 static void peek_char(compiler_common *common)
2271 {
2272 /* Reads the character into TMP1, keeps STR_PTR.
2273 Does not check STR_END. TMP2 Destroyed. */
2274 DEFINE_COMPILER;
2275 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2276 struct sljit_jump *jump;
2277 #endif
2278
2279 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2280 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2281 if (common->utf)
2282 {
2283 #if defined COMPILE_PCRE8
2284 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2285 #elif defined COMPILE_PCRE16
2286 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2287 #endif /* COMPILE_PCRE[8|16] */
2288 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2289 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2290 JUMPHERE(jump);
2291 }
2292 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2293 }
2294
2295 static void read_char8_type(compiler_common *common)
2296 {
2297 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
2298 DEFINE_COMPILER;
2299 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2300 struct sljit_jump *jump;
2301 #endif
2302
2303 #ifdef SUPPORT_UTF
2304 if (common->utf)
2305 {
2306 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2307 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2308 #if defined COMPILE_PCRE8
2309 /* This can be an extra read in some situations, but hopefully
2310 it is needed in most cases. */
2311 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2312 jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2313 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
2314 JUMPHERE(jump);
2315 #elif defined COMPILE_PCRE16
2316 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2317 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2318 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2319 JUMPHERE(jump);
2320 /* Skip low surrogate if necessary. */
2321 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xfc00);
2322 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xd800);
2323 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2324 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
2325 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2326 #elif defined COMPILE_PCRE32
2327 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2328 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2329 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2330 JUMPHERE(jump);
2331 #endif /* COMPILE_PCRE[8|16|32] */
2332 return;
2333 }
2334 #endif /* SUPPORT_UTF */
2335 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2336 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2337 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2338 /* The ctypes array contains only 256 values. */
2339 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2340 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2341 #endif
2342 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2343 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2344 JUMPHERE(jump);
2345 #endif
2346 }
2347
2348 static void skip_char_back(compiler_common *common)
2349 {
2350 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
2351 DEFINE_COMPILER;
2352 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2353 #if defined COMPILE_PCRE8
2354 struct sljit_label *label;
2355
2356 if (common->utf)
2357 {
2358 label = LABEL();
2359 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2360 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2361 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
2362 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
2363 return;
2364 }
2365 #elif defined COMPILE_PCRE16
2366 if (common->utf)
2367 {
2368 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2369 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2370 /* Skip low surrogate if necessary. */
2371 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2372 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
2373 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2374 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2375 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2376 return;
2377 }
2378 #endif /* COMPILE_PCRE[8|16] */
2379 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2380 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2381 }
2382
2383 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpiftrue)
2384 {
2385 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
2386 DEFINE_COMPILER;
2387
2388 if (nltype == NLTYPE_ANY)
2389 {
2390 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
2391 add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2392 }
2393 else if (nltype == NLTYPE_ANYCRLF)
2394 {
2395 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_CR);
2396 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2397 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
2398 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
2399 add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2400 }
2401 else
2402 {
2403 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
2404 add_jump(compiler, backtracks, CMP(jumpiftrue ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
2405 }
2406 }
2407
2408 #ifdef SUPPORT_UTF
2409
2410 #if defined COMPILE_PCRE8
2411 static void do_utfreadchar(compiler_common *common)
2412 {
2413 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2414 of the character (>= 0xc0). Return char value in TMP1, length - 1 in TMP2. */
2415 DEFINE_COMPILER;
2416 struct sljit_jump *jump;
2417
2418 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2419 /* Searching for the first zero. */
2420 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
2421 jump = JUMP(SLJIT_C_NOT_ZERO);
2422 /* Two byte sequence. */
2423 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2424 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2425 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1f);
2426 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2427 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2428 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2429 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2430 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2431 JUMPHERE(jump);
2432
2433 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10);
2434 jump = JUMP(SLJIT_C_NOT_ZERO);
2435 /* Three byte sequence. */
2436 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2437 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0f);
2438 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 12);
2439 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2440 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2441 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2442 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2443 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2444 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2445 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2446 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
2447 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2448 JUMPHERE(jump);
2449
2450 /* Four byte sequence. */
2451 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2452 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x07);
2453 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 18);
2454 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2455 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
2456 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2457 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2458 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2459 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2460 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2461 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(3));
2462 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2463 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2464 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2465 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
2466 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2467 }
2468
2469 static void do_utfreadtype8(compiler_common *common)
2470 {
2471 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
2472 of the character (>= 0xc0). Return value in TMP1. */
2473 DEFINE_COMPILER;
2474 struct sljit_jump *jump;
2475 struct sljit_jump *compare;
2476
2477 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2478
2479 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
2480 jump = JUMP(SLJIT_C_NOT_ZERO);
2481 /* Two byte sequence. */
2482 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2483 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2484 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
2485 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2486 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2487 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
2488 compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2489 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2490 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2491
2492 JUMPHERE(compare);
2493 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2494 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2495 JUMPHERE(jump);
2496
2497 /* We only have types for characters less than 256. */
2498 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2499 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2500 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2501 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2502 }
2503
2504 #elif defined COMPILE_PCRE16
2505
2506 static void do_utfreadchar(compiler_common *common)
2507 {
2508 /* Fast decoding a UTF-16 character. TMP1 contains the first 16 bit char
2509 of the character (>= 0xd800). Return char value in TMP1, length - 1 in TMP2. */
2510 DEFINE_COMPILER;
2511 struct sljit_jump *jump;
2512
2513 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2514 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);
2515 /* Do nothing, only return. */
2516 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2517
2518 JUMPHERE(jump);
2519 /* Combine two 16 bit characters. */
2520 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2521 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2522 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2523 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);
2524 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3ff);
2525 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2526 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2527 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2528 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2529 }
2530
2531 #endif /* COMPILE_PCRE[8|16] */
2532
2533 #endif /* SUPPORT_UTF */
2534
2535 #ifdef SUPPORT_UCP
2536
2537 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
2538 #define UCD_BLOCK_MASK 127
2539 #define UCD_BLOCK_SHIFT 7
2540
2541 static void do_getucd(compiler_common *common)
2542 {
2543 /* Search the UCD record for the character comes in TMP1.
2544 Returns chartype in TMP1 and UCD offset in TMP2. */
2545 DEFINE_COMPILER;
2546
2547 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
2548
2549 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2550 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2551 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
2552 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
2553 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2554 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
2555 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
2556 OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
2557 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
2558 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
2559 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2560 }
2561 #endif
2562
2563 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
2564 {
2565 DEFINE_COMPILER;
2566 struct sljit_label *mainloop;
2567 struct sljit_label *newlinelabel = NULL;
2568 struct sljit_jump *start;
2569 struct sljit_jump *end = NULL;
2570 struct sljit_jump *nl = NULL;
2571 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2572 struct sljit_jump *singlechar;
2573 #endif
2574 jump_list *newline = NULL;
2575 BOOL newlinecheck = FALSE;
2576 BOOL readuchar = FALSE;
2577
2578 if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
2579 common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
2580 newlinecheck = TRUE;
2581
2582 if (firstline)
2583 {
2584 /* Search for the end of the first line. */
2585 SLJIT_ASSERT(common->first_line_end != 0);
2586 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
2587
2588 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2589 {
2590 mainloop = LABEL();
2591 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2592 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2593 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
2594 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2595 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
2596 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
2597 JUMPHERE(end);
2598 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2599 }
2600 else
2601 {
2602 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2603 mainloop = LABEL();
2604 /* Continual stores does not cause data dependency. */
2605 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2606 read_char(common);
2607 check_newlinechar(common, common->nltype, &newline, TRUE);
2608 CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);
2609 JUMPHERE(end);
2610 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2611 set_jumps(newline, LABEL());
2612 }
2613
2614 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
2615 }
2616
2617 start = JUMP(SLJIT_JUMP);
2618
2619 if (newlinecheck)
2620 {
2621 newlinelabel = LABEL();
2622 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2623 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2624 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2625 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
2626 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2627 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2628 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
2629 #endif
2630 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2631 nl = JUMP(SLJIT_JUMP);
2632 }
2633
2634 mainloop = LABEL();
2635
2636 /* Increasing the STR_PTR here requires one less jump in the most common case. */
2637 #ifdef SUPPORT_UTF
2638 if (common->utf) readuchar = TRUE;
2639 #endif
2640 if (newlinecheck) readuchar = TRUE;
2641
2642 if (readuchar)
2643 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2644
2645 if (newlinecheck)
2646 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
2647
2648 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2649 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2650 #if defined COMPILE_PCRE8
2651 if (common->utf)
2652 {
2653 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2654 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2655 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2656 JUMPHERE(singlechar);
2657 }
2658 #elif defined COMPILE_PCRE16
2659 if (common->utf)
2660 {
2661 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2662 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2663 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2664 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2665 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2666 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2667 JUMPHERE(singlechar);
2668 }
2669 #endif /* COMPILE_PCRE[8|16] */
2670 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2671 JUMPHERE(start);
2672
2673 if (newlinecheck)
2674 {
2675 JUMPHERE(end);
2676 JUMPHERE(nl);
2677 }
2678
2679 return mainloop;
2680 }
2681
2682 #define MAX_N_CHARS 3
2683
2684 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)
2685 {
2686 DEFINE_COMPILER;
2687 struct sljit_label *start;
2688 struct sljit_jump *quit;
2689 pcre_uint32 chars[MAX_N_CHARS * 2];
2690 pcre_uchar *cc = common->start + 1 + LINK_SIZE;
2691 int location = 0;
2692 pcre_int32 len, c, bit, caseless;
2693 int must_stop;
2694
2695 /* We do not support alternatives now. */
2696 if (*(common->start + GET(common->start, 1)) == OP_ALT)
2697 return FALSE;
2698
2699 while (TRUE)
2700 {
2701 caseless = 0;
2702 must_stop = 1;
2703 switch(*cc)
2704 {
2705 case OP_CHAR:
2706 must_stop = 0;
2707 cc++;
2708 break;
2709
2710 case OP_CHARI:
2711 caseless = 1;
2712 must_stop = 0;
2713 cc++;
2714 break;
2715
2716 case OP_SOD:
2717 case OP_SOM:
2718 case OP_SET_SOM:
2719 case OP_NOT_WORD_BOUNDARY:
2720 case OP_WORD_BOUNDARY:
2721 case OP_EODN:
2722 case OP_EOD:
2723 case OP_CIRC:
2724 case OP_CIRCM:
2725 case OP_DOLL:
2726 case OP_DOLLM:
2727 /* Zero width assertions. */
2728 cc++;
2729 continue;
2730
2731 case OP_PLUS:
2732 case OP_MINPLUS:
2733 case OP_POSPLUS:
2734 cc++;
2735 break;
2736
2737 case OP_EXACT:
2738 cc += 1 + IMM2_SIZE;
2739 break;
2740
2741 case OP_PLUSI:
2742 case OP_MINPLUSI:
2743 case OP_POSPLUSI:
2744 caseless = 1;
2745 cc++;
2746 break;
2747
2748 case OP_EXACTI:
2749 caseless = 1;
2750 cc += 1 + IMM2_SIZE;
2751 break;
2752
2753 default:
2754 must_stop = 2;
2755 break;
2756 }
2757
2758 if (must_stop == 2)
2759 break;
2760
2761 len = 1;
2762 #ifdef SUPPORT_UTF
2763 if (common->utf && HAS_EXTRALEN(cc[0])) len += GET_EXTRALEN(cc[0]);
2764 #endif
2765
2766 if (caseless && char_has_othercase(common, cc))
2767 {
2768 caseless = char_get_othercase_bit(common, cc);
2769 if (caseless == 0)
2770 return FALSE;
2771 #ifdef COMPILE_PCRE8
2772 caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 8));
2773 #else
2774 if ((caseless & 0x100) != 0)
2775 caseless = ((caseless & 0xff) << 16) | (len - (caseless >> 9));
2776 else
2777 caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 9));
2778 #endif
2779 }
2780 else
2781 caseless = 0;
2782
2783 while (len > 0 && location < MAX_N_CHARS * 2)
2784 {
2785 c = *cc;
2786 bit = 0;
2787 if (len == (caseless & 0xff))
2788 {
2789 bit = caseless >> 8;
2790 c |= bit;
2791 }
2792
2793 chars[location] = c;
2794 chars[location + 1] = bit;
2795
2796 len--;
2797 location += 2;
2798 cc++;
2799 }
2800
2801 if (location >= MAX_N_CHARS * 2 || must_stop != 0)
2802 break;
2803 }
2804
2805 /* At least two characters are required. */
2806 if (location < 2 * 2)
2807 return FALSE;
2808
2809 if (firstline)
2810 {
2811 SLJIT_ASSERT(common->first_line_end != 0);
2812 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
2813 OP2(SLJIT_SUB, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
2814 }
2815 else
2816 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
2817
2818 start = LABEL();
2819 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2820
2821 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2822 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2823 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2824 if (chars[1] != 0)
2825 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]);
2826 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start);
2827 if (location > 2 * 2)
2828 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2829 if (chars[3] != 0)
2830 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[3]);
2831 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[2], start);
2832 if (location > 2 * 2)
2833 {
2834 if (chars[5] != 0)
2835 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[5]);
2836 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[4], start);
2837 }
2838 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2839
2840 JUMPHERE(quit);
2841
2842 if (firstline)
2843 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
2844 else
2845 OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
2846 return TRUE;
2847 }
2848
2849 #undef MAX_N_CHARS
2850
2851 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)
2852 {
2853 DEFINE_COMPILER;
2854 struct sljit_label *start;
2855 struct sljit_jump *quit;
2856 struct sljit_jump *found;
2857 pcre_uchar oc, bit;
2858
2859 if (firstline)
2860 {
2861 SLJIT_ASSERT(common->first_line_end != 0);
2862 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
2863 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
2864 }
2865
2866 start = LABEL();
2867 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2868 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2869
2870 oc = first_char;
2871 if (caseless)
2872 {
2873 oc = TABLE_GET(first_char, common->fcc, first_char);
2874 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
2875 if (first_char > 127 && common->utf)
2876 oc = UCD_OTHERCASE(first_char);
2877 #endif
2878 }
2879 if (first_char == oc)
2880 found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, first_char);
2881 else
2882 {
2883 bit = first_char ^ oc;
2884 if (is_powerof2(bit))
2885 {
2886 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
2887 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit);
2888 }
2889 else
2890 {
2891 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char);
2892 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2893 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
2894 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
2895 found = JUMP(SLJIT_C_NOT_ZERO);
2896 }
2897 }
2898
2899 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2900 JUMPTO(SLJIT_JUMP, start);
2901 JUMPHERE(found);
2902 JUMPHERE(quit);
2903
2904 if (firstline)
2905 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
2906 }
2907
2908 static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
2909 {
2910 DEFINE_COMPILER;
2911 struct sljit_label *loop;
2912 struct sljit_jump *lastchar;
2913 struct sljit_jump *firstchar;
2914 struct sljit_jump *quit;
2915 struct sljit_jump *foundcr = NULL;
2916 struct sljit_jump *notfoundnl;
2917 jump_list *newline = NULL;
2918
2919 if (firstline)
2920 {
2921 SLJIT_ASSERT(common->first_line_end != 0);
2922 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
2923 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
2924 }
2925
2926 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2927 {
2928 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2929 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
2930 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
2931 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
2932 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
2933
2934 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
2935 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
2936 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER_EQUAL);
2937 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2938 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
2939 #endif
2940 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2941
2942 loop = LABEL();
2943 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2944 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2945 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
2946 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
2947 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
2948 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
2949
2950 JUMPHERE(quit);
2951 JUMPHERE(firstchar);
2952 JUMPHERE(lastchar);
2953
2954 if (firstline)
2955 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
2956 return;
2957 }
2958
2959 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
2960 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
2961 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
2962 skip_char_back(common);
2963
2964 loop = LABEL();
2965 read_char(common);
2966 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2967 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
2968 foundcr = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
2969 check_newlinechar(common, common->nltype, &newline, FALSE);
2970 set_jumps(newline, loop);
2971
2972 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
2973 {
2974 quit = JUMP(SLJIT_JUMP);
2975 JUMPHERE(foundcr);
2976 notfoundnl = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2977 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2978 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
2979 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2980 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2981 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
2982 #endif
2983 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2984 JUMPHERE(notfoundnl);
2985 JUMPHERE(quit);
2986 }
2987 JUMPHERE(lastchar);
2988 JUMPHERE(firstchar);
2989
2990 if (firstline)
2991 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
2992 }
2993
2994 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks);
2995
2996 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, sljit_uw start_bits, BOOL firstline)
2997 {
2998 DEFINE_COMPILER;
2999 struct sljit_label *start;
3000 struct sljit_jump *quit;
3001 struct sljit_jump *found = NULL;
3002 jump_list *matches = NULL;
3003 pcre_uint8 inverted_start_bits[32];
3004 int i;
3005 #ifndef COMPILE_PCRE8
3006 struct sljit_jump *jump;
3007 #endif
3008
3009 for (i = 0; i < 32; ++i)
3010 inverted_start_bits[i] = ~(((pcre_uint8*)start_bits)[i]);
3011
3012 if (firstline)
3013 {
3014 SLJIT_ASSERT(common->first_line_end != 0);
3015 OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
3016 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3017 }
3018
3019 start = LABEL();
3020 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3021 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3022 #ifdef SUPPORT_UTF
3023 if (common->utf)
3024 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3025 #endif
3026
3027 if (!check_class_ranges(common, inverted_start_bits, (inverted_start_bits[31] & 0x80) != 0, &matches))
3028 {
3029 #ifndef COMPILE_PCRE8
3030 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255);
3031 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
3032 JUMPHERE(jump);
3033 #endif
3034 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3035 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3036 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), start_bits);
3037 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3038 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3039 found = JUMP(SLJIT_C_NOT_ZERO);
3040 }
3041
3042 #ifdef SUPPORT_UTF
3043 if (common->utf)
3044 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3045 #endif
3046 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3047 #ifdef SUPPORT_UTF
3048 #if defined COMPILE_PCRE8
3049 if (common->utf)
3050 {
3051 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
3052 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3053 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3054 }
3055 #elif defined COMPILE_PCRE16
3056 if (common->utf)
3057 {
3058 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
3059 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3060 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3061 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3062 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3063 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3064 }
3065 #endif /* COMPILE_PCRE[8|16] */
3066 #endif /* SUPPORT_UTF */
3067 JUMPTO(SLJIT_JUMP, start);
3068 if (found != NULL)
3069 JUMPHERE(found);
3070 if (matches != NULL)
3071 set_jumps(matches, LABEL());
3072 JUMPHERE(quit);
3073
3074 if (firstline)
3075 OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
3076 }
3077
3078 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
3079 {
3080 DEFINE_COMPILER;
3081 struct sljit_label *loop;
3082 struct sljit_jump *toolong;
3083 struct sljit_jump *alreadyfound;
3084 struct sljit_jump *found;
3085 struct sljit_jump *foundoc = NULL;
3086 struct sljit_jump *notfound;
3087 pcre_uint32 oc, bit;
3088
3089 SLJIT_ASSERT(common->req_char_ptr != 0);
3090 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr);
3091 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
3092 toolong = CMP(SLJIT_C_LESS, TMP1, 0, STR_END, 0);
3093 alreadyfound = CMP(SLJIT_C_LESS, STR_PTR, 0, TMP2, 0);
3094
3095 if (has_firstchar)
3096 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3097 else
3098 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
3099
3100 loop = LABEL();
3101 notfound = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, STR_END, 0);
3102
3103 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
3104 oc = req_char;
3105 if (caseless)
3106 {
3107 oc = TABLE_GET(req_char, common->fcc, req_char);
3108 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3109 if (req_char > 127 && common->utf)
3110 oc = UCD_OTHERCASE(req_char);
3111 #endif
3112 }
3113 if (req_char == oc)
3114 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
3115 else
3116 {
3117 bit = req_char ^ oc;
3118 if (is_powerof2(bit))
3119 {
3120 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
3121 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
3122 }
3123 else
3124 {
3125 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
3126 foundoc = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, oc);
3127 }
3128 }
3129 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3130 JUMPTO(SLJIT_JUMP, loop);
3131
3132 JUMPHERE(found);
3133 if (foundoc)
3134 JUMPHERE(foundoc);
3135 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr, TMP1, 0);
3136 JUMPHERE(alreadyfound);
3137 JUMPHERE(toolong);
3138 return notfound;
3139 }
3140
3141 static void do_revertframes(compiler_common *common)
3142 {
3143 DEFINE_COMPILER;
3144 struct sljit_jump *jump;
3145 struct sljit_label *mainloop;
3146
3147 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3148 OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
3149 GET_LOCAL_BASE(TMP3, 0, 0);
3150
3151 /* Drop frames until we reach STACK_TOP. */
3152 mainloop = LABEL();
3153 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
3154 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0);
3155 jump = JUMP(SLJIT_C_SIG_LESS_EQUAL);
3156
3157 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
3158 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
3159 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_sw));
3160 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
3161 JUMPTO(SLJIT_JUMP, mainloop);
3162
3163 JUMPHERE(jump);
3164 jump = JUMP(SLJIT_C_SIG_LESS);
3165 /* End of dropping frames. */
3166 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3167
3168 JUMPHERE(jump);
3169 OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
3170 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
3171 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
3172 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
3173 JUMPTO(SLJIT_JUMP, mainloop);
3174 }
3175
3176 static void check_wordboundary(compiler_common *common)
3177 {
3178 DEFINE_COMPILER;
3179 struct sljit_jump *skipread;
3180 jump_list *skipread_list = NULL;
3181 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
3182 struct sljit_jump *jump;
3183 #endif
3184
3185 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
3186
3187 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3188 /* Get type of the previous char, and put it to LOCALS1. */
3189 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3190 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3191 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, 0);
3192 skipread = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
3193 skip_char_back(common);
3194 check_start_used_ptr(common);
3195 read_char(common);
3196
3197 /* Testing char type. */
3198 #ifdef SUPPORT_UCP
3199 if (common->use_ucp)
3200 {
3201 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3202 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3203 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3204 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3205 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3206 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3207 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3208 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3209 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3210 JUMPHERE(jump);
3211 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0);
3212 }
3213 else
3214 #endif
3215 {
3216 #ifndef COMPILE_PCRE8
3217 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3218 #elif defined SUPPORT_UTF
3219 /* Here LOCALS1 has already been zeroed. */
3220 jump = NULL;
3221 if (common->utf)
3222 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3223 #endif /* COMPILE_PCRE8 */
3224 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
3225 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
3226 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3227 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
3228 #ifndef COMPILE_PCRE8
3229 JUMPHERE(jump);
3230 #elif defined SUPPORT_UTF
3231 if (jump != NULL)
3232 JUMPHERE(jump);
3233 #endif /* COMPILE_PCRE8 */
3234 }
3235 JUMPHERE(skipread);
3236
3237 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3238 check_str_end(common, &skipread_list);
3239 peek_char(common);
3240
3241 /* Testing char type. This is a code duplication. */
3242 #ifdef SUPPORT_UCP
3243 if (common->use_ucp)
3244 {
3245 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3246 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3247 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3248 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3249 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3250 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3251 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3252 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3253 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3254 JUMPHERE(jump);
3255 }
3256 else
3257 #endif
3258 {
3259 #ifndef COMPILE_PCRE8
3260 /* TMP2 may be destroyed by peek_char. */
3261 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3262 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3263 #elif defined SUPPORT_UTF
3264 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3265 jump = NULL;
3266 if (common->utf)
3267 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3268 #endif
3269 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
3270 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
3271 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
3272 #ifndef COMPILE_PCRE8
3273 JUMPHERE(jump);
3274 #elif defined SUPPORT_UTF
3275 if (jump != NULL)
3276 JUMPHERE(jump);
3277 #endif /* COMPILE_PCRE8 */
3278 }
3279 set_jumps(skipread_list, LABEL());
3280
3281 OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3282 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3283 }
3284
3285 /*
3286 range format:
3287
3288 ranges[0] = length of the range (max MAX_RANGE_SIZE, -1 means invalid range).
3289 ranges[1] = first bit (0 or 1)
3290 ranges[2-length] = position of the bit change (when the current bit is not equal to the previous)
3291 */
3292
3293 static BOOL check_ranges(compiler_common *common, int *ranges, jump_list **backtracks, BOOL readch)
3294 {
3295 DEFINE_COMPILER;
3296 struct sljit_jump *jump;
3297
3298 if (ranges[0] < 0)
3299 return FALSE;
3300
3301 switch(ranges[0])
3302 {
3303 case 1:
3304 if (readch)
3305 read_char(common);
3306 add_jump(compiler, backtracks, CMP(ranges[1] == 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3307 return TRUE;
3308
3309 case 2:
3310 if (readch)
3311 read_char(common);
3312 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
3313 add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
3314 return TRUE;
3315
3316 case 4:
3317 if (ranges[2] + 1 == ranges[3] && ranges[4] + 1 == ranges[5])
3318 {
3319 if (readch)
3320 read_char(common);
3321 if (ranges[1] != 0)
3322 {
3323 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3324 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));
3325 }
3326 else
3327 {
3328 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]);
3329 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));
3330 JUMPHERE(jump);
3331 }
3332 return TRUE;
3333 }
3334 if ((ranges[3] - ranges[2]) == (ranges[5] - ranges[4]) && is_powerof2(ranges[4] - ranges[2]))
3335 {
3336 if (readch)
3337 read_char(common);
3338 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4] - ranges[2]);
3339 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4]);
3340 add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[5] - ranges[4]));
3341 return TRUE;
3342 }
3343 return FALSE;
3344
3345 default:
3346 return FALSE;
3347 }
3348 }
3349
3350 static void get_ctype_ranges(compiler_common *common, int flag, int *ranges)
3351 {
3352 int i, bit, length;
3353 const pcre_uint8 *ctypes = (const pcre_uint8*)common->ctypes;
3354
3355 bit = ctypes[0] & flag;
3356 ranges[0] = -1;
3357 ranges[1] = bit != 0 ? 1 : 0;
3358 length = 0;
3359
3360 for (i = 1; i < 256; i++)
3361 if ((ctypes[i] & flag) != bit)
3362 {
3363 if (length >= MAX_RANGE_SIZE)
3364 return;
3365 ranges[2 + length] = i;
3366 length++;
3367 bit ^= flag;
3368 }
3369
3370 if (bit != 0)
3371 {
3372 if (length >= MAX_RANGE_SIZE)
3373 return;
3374 ranges[2 + length] = 256;
3375 length++;
3376 }
3377 ranges[0] = length;
3378 }
3379
3380 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks)
3381 {
3382 int ranges[2 + MAX_RANGE_SIZE];
3383 pcre_uint8 bit, cbit, all;
3384 int i, byte, length = 0;
3385
3386 bit = bits[0] & 0x1;
3387 ranges[1] = bit;
3388 /* Can be 0 or 255. */
3389 all = -bit;
3390
3391 for (i = 0; i < 256; )
3392 {
3393 byte = i >> 3;
3394 if ((i & 0x7) == 0 && bits[byte] == all)
3395 i += 8;
3396 else
3397 {
3398 cbit = (bits[byte] >> (i & 0x7)) & 0x1;
3399 if (cbit != bit)
3400 {
3401 if (length >= MAX_RANGE_SIZE)
3402 return FALSE;
3403 ranges[2 + length] = i;
3404 length++;
3405 bit = cbit;
3406 all = -cbit;
3407 }
3408 i++;
3409 }
3410 }
3411
3412 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
3413 {
3414 if (length >= MAX_RANGE_SIZE)
3415 return FALSE;
3416 ranges[2 + length] = 256;
3417 length++;
3418 }
3419 ranges[0] = length;
3420
3421 return check_ranges(common, ranges, backtracks, FALSE);
3422 }
3423
3424 static void check_anynewline(compiler_common *common)
3425 {
3426 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3427 DEFINE_COMPILER;
3428
3429 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3430
3431 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
3432 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
3433 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3434 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
3435 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3436 #ifdef COMPILE_PCRE8
3437 if (common->utf)
3438 {
3439 #endif
3440 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3441 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
3442 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
3443 #ifdef COMPILE_PCRE8
3444 }
3445 #endif
3446 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3447 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3448 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3449 }
3450
3451 static void check_hspace(compiler_common *common)
3452 {
3453 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3454 DEFINE_COMPILER;
3455
3456 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3457
3458 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
3459 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3460 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
3461 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3462 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
3463 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3464 #ifdef COMPILE_PCRE8
3465 if (common->utf)
3466 {
3467 #endif
3468 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3469 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
3470 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3471 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
3472 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3473 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
3474 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
3475 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3476 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
3477 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3478 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
3479 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3480 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
3481 #ifdef COMPILE_PCRE8
3482 }
3483 #endif
3484 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3485 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3486
3487 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3488 }
3489
3490 static void check_vspace(compiler_common *common)
3491 {
3492 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3493 DEFINE_COMPILER;
3494
3495 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3496
3497 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
3498 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
3499 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3500 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
3501 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3502 #ifdef COMPILE_PCRE8
3503 if (common->utf)
3504 {
3505 #endif
3506 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3507 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
3508 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
3509 #ifdef COMPILE_PCRE8
3510 }
3511 #endif
3512 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3513 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3514
3515 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3516 }
3517
3518 #define CHAR1 STR_END
3519 #define CHAR2 STACK_TOP
3520
3521 static void do_casefulcmp(compiler_common *common)
3522 {
3523 DEFINE_COMPILER;
3524 struct sljit_jump *jump;
3525 struct sljit_label *label;
3526
3527 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3528 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3529 OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
3530 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR2, 0);
3531 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3532 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3533
3534 label = LABEL();
3535 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
3536 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3537 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
3538 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
3539 JUMPTO(SLJIT_C_NOT_ZERO, label);
3540
3541 JUMPHERE(jump);
3542 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3543 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
3544 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3545 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3546 }
3547
3548 #define LCC_TABLE STACK_LIMIT
3549
3550 static void do_caselesscmp(compiler_common *common)
3551 {
3552 DEFINE_COMPILER;
3553 struct sljit_jump *jump;
3554 struct sljit_label *label;
3555
3556 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3557 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3558
3559 OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
3560 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR1, 0);
3561 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, CHAR2, 0);
3562 OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
3563 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3564 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3565
3566 label = LABEL();
3567 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
3568 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3569 #ifndef COMPILE_PCRE8
3570 jump = CMP(SLJIT_C_GREATER, CHAR1, 0, SLJIT_IMM, 255);
3571 #endif
3572 OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
3573 #ifndef COMPILE_PCRE8
3574 JUMPHERE(jump);
3575 jump = CMP(SLJIT_C_GREATER, CHAR2, 0, SLJIT_IMM, 255);
3576 #endif
3577 OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
3578 #ifndef COMPILE_PCRE8
3579 JUMPHERE(jump);
3580 #endif
3581 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
3582 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
3583 JUMPTO(SLJIT_C_NOT_ZERO, label);
3584
3585 JUMPHERE(jump);
3586 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3587 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
3588 OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3589 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3590 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3591 }
3592
3593 #undef LCC_TABLE
3594 #undef CHAR1
3595 #undef CHAR2
3596
3597 #if defined SUPPORT_UTF && defined SUPPORT_UCP
3598
3599 static const pcre_uchar * SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
3600 {
3601 /* This function would be ineffective to do in JIT level. */
3602 pcre_uint32 c1, c2;
3603 const pcre_uchar *src2 = args->uchar_ptr;
3604 const pcre_uchar *end2 = args->end;
3605 const ucd_record *ur;
3606 const pcre_uint32 *pp;
3607
3608 while (src1 < end1)
3609 {
3610 if (src2 >= end2)
3611 return (pcre_uchar*)1;
3612 GETCHARINC(c1, src1);
3613 GETCHARINC(c2, src2);
3614 ur = GET_UCD(c2);
3615 if (c1 != c2 && c1 != c2 + ur->other_case)
3616 {
3617 pp = PRIV(ucd_caseless_sets) + ur->caseset;
3618 for (;;)
3619 {
3620 if (c1 < *pp) return NULL;
3621 if (c1 == *pp++) break;
3622 }
3623 }
3624 }
3625 return src2;
3626 }
3627
3628 #endif /* SUPPORT_UTF && SUPPORT_UCP */
3629
3630 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
3631 compare_context* context, jump_list **backtracks)
3632 {
3633 DEFINE_COMPILER;
3634 unsigned int othercasebit = 0;
3635 pcre_uchar *othercasechar = NULL;
3636 #ifdef SUPPORT_UTF
3637 int utflength;
3638 #endif
3639
3640 if (caseless && char_has_othercase(common, cc))
3641 {
3642 othercasebit = char_get_othercase_bit(common, cc);
3643 SLJIT_ASSERT(othercasebit);
3644 /* Extracting bit difference info. */
3645 #if defined COMPILE_PCRE8
3646 othercasechar = cc + (othercasebit >> 8);
3647 othercasebit &= 0xff;
3648 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3649 /* Note that this code only handles characters in the BMP. If there
3650 ever are characters outside the BMP whose othercase differs in only one
3651 bit from itself (there currently are none), this code will need to be
3652 revised for COMPILE_PCRE32. */
3653 othercasechar = cc + (othercasebit >> 9);
3654 if ((othercasebit & 0x100) != 0)
3655 othercasebit = (othercasebit & 0xff) << 8;
3656 else
3657 othercasebit &= 0xff;
3658 #endif /* COMPILE_PCRE[8|16|32] */
3659 }
3660
3661 if (context->sourcereg == -1)
3662 {
3663 #if defined COMPILE_PCRE8
3664 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3665 if (context->length >= 4)
3666 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3667 else if (context->length >= 2)
3668 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3669 else
3670 #endif
3671 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3672 #elif defined COMPILE_PCRE16
3673 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3674 if (context->length >= 4)
3675 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3676 else
3677 #endif
3678 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3679 #elif defined COMPILE_PCRE32
3680 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3681 #endif /* COMPILE_PCRE[8|16|32] */
3682 context->sourcereg = TMP2;
3683 }
3684
3685 #ifdef SUPPORT_UTF
3686 utflength = 1;
3687 if (common->utf && HAS_EXTRALEN(*cc))
3688 utflength += GET_EXTRALEN(*cc);
3689
3690 do
3691 {
3692 #endif
3693
3694 context->length -= IN_UCHARS(1);
3695 #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
3696
3697 /* Unaligned read is supported. */
3698 if (othercasebit != 0 && othercasechar == cc)
3699 {
3700 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
3701 context->oc.asuchars[context->ucharptr] = othercasebit;
3702 }
3703 else
3704 {
3705 context->c.asuchars[context->ucharptr] = *cc;
3706 context->oc.asuchars[context->ucharptr] = 0;
3707 }
3708 context->ucharptr++;
3709
3710 #if defined COMPILE_PCRE8
3711 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
3712 #else
3713 if (context->ucharptr >= 2 || context->length == 0)
3714 #endif
3715 {
3716 if (context->length >= 4)
3717 OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3718 else if (context->length >= 2)
3719 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3720 #if defined COMPILE_PCRE8
3721 else if (context->length >= 1)
3722 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3723 #endif /* COMPILE_PCRE8 */
3724 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
3725
3726 switch(context->ucharptr)
3727 {
3728 case 4 / sizeof(pcre_uchar):
3729 if (context->oc.asint != 0)
3730 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
3731 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
3732 break;
3733
3734 case 2 / sizeof(pcre_uchar):
3735 if (context->oc.asushort != 0)
3736 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
3737 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
3738 break;
3739
3740 #ifdef COMPILE_PCRE8
3741 case 1:
3742 if (context->oc.asbyte != 0)
3743 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
3744 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
3745 break;
3746 #endif
3747
3748 default:
3749 SLJIT_ASSERT_STOP();
3750 break;
3751 }
3752 context->ucharptr = 0;
3753 }
3754
3755 #else
3756
3757 /* Unaligned read is unsupported or in 32 bit mode. */
3758 if (context->length >= 1)
3759 OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3760
3761 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
3762
3763 if (othercasebit != 0 && othercasechar == cc)
3764 {
3765 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
3766 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
3767 }
3768 else
3769 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
3770
3771 #endif
3772
3773 cc++;
3774 #ifdef SUPPORT_UTF
3775 utflength--;
3776 }
3777 while (utflength > 0);
3778 #endif
3779
3780 return cc;
3781 }
3782
3783 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3784
3785 #define SET_TYPE_OFFSET(value) \
3786 if ((value) != typeoffset) \
3787 { \
3788 if ((value) > typeoffset) \
3789 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
3790 else \
3791 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
3792 } \
3793 typeoffset = (value);
3794
3795 #define SET_CHAR_OFFSET(value) \
3796 if ((value) != charoffset) \
3797 { \
3798 if ((value) > charoffset) \
3799 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (value) - charoffset); \
3800 else \
3801 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, charoffset - (value)); \
3802 } \
3803 charoffset = (value);
3804
3805 static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
3806 {
3807 DEFINE_COMPILER;
3808 jump_list *found = NULL;
3809 jump_list **list = (*cc & XCL_NOT) == 0 ? &found : backtracks;
3810 pcre_int32 c, charoffset;
3811 const pcre_uint32 *other_cases;
3812 struct sljit_jump *jump = NULL;
3813 pcre_uchar *ccbegin;
3814 int compares, invertcmp, numberofcmps;
3815 #ifdef SUPPORT_UCP
3816 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
3817 BOOL charsaved = FALSE;
3818 int typereg = TMP1, scriptreg = TMP1;
3819 pcre_int32 typeoffset;
3820 #endif
3821
3822 /* Although SUPPORT_UTF must be defined, we are
3823 not necessary in utf mode even in 8 bit mode. */
3824 detect_partial_match(common, backtracks);
3825 read_char(common);
3826
3827 if ((*cc++ & XCL_MAP) != 0)
3828 {
3829 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3830 #ifndef COMPILE_PCRE8
3831 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3832 #elif defined SUPPORT_UTF
3833 if (common->utf)
3834 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3835 #endif
3836
3837 if (!check_class_ranges(common, (const pcre_uint8 *)cc, TRUE, list))
3838 {
3839 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3840 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3841 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
3842 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3843 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3844 add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));
3845 }
3846
3847 #ifndef COMPILE_PCRE8
3848 JUMPHERE(jump);
3849 #elif defined SUPPORT_UTF
3850 if (common->utf)
3851 JUMPHERE(jump);
3852 #endif
3853 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3854 #ifdef SUPPORT_UCP
3855 charsaved = TRUE;
3856 #endif
3857 cc += 32 / sizeof(pcre_uchar);
3858 }
3859
3860 /* Scanning the necessary info. */
3861 ccbegin = cc;
3862 compares = 0;
3863 while (*cc != XCL_END)
3864 {
3865 compares++;
3866 if (*cc == XCL_SINGLE)
3867 {
3868 cc += 2;
3869 #ifdef SUPPORT_UTF
3870 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
3871 #endif
3872 #ifdef SUPPORT_UCP
3873 needschar = TRUE;
3874 #endif
3875 }
3876 else if (*cc == XCL_RANGE)
3877 {
3878 cc += 2;
3879 #ifdef SUPPORT_UTF
3880 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
3881 #endif
3882 cc++;
3883 #ifdef SUPPORT_UTF
3884 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
3885 #endif
3886 #ifdef SUPPORT_UCP
3887 needschar = TRUE;
3888 #endif
3889 }
3890 #ifdef SUPPORT_UCP
3891 else
3892 {
3893 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
3894 cc++;
3895 switch(*cc)
3896 {
3897 case PT_ANY:
3898 break;
3899
3900 case PT_LAMP:
3901 case PT_GC:
3902 case PT_PC:
3903 case PT_ALNUM:
3904 needstype = TRUE;
3905 break;
3906
3907 case PT_SC:
3908 needsscript = TRUE;
3909 break;
3910
3911 case PT_SPACE:
3912 case PT_PXSPACE:
3913 case PT_WORD:
3914 needstype = TRUE;
3915 needschar = TRUE;
3916 break;
3917
3918 case PT_CLIST:
3919 case PT_UCNC:
3920 needschar = TRUE;
3921 break;
3922
3923 default:
3924 SLJIT_ASSERT_STOP();
3925 break;
3926 }
3927 cc += 2;
3928 }
3929 #endif
3930 }
3931
3932 #ifdef SUPPORT_UCP
3933 /* Simple register allocation. TMP1 is preferred if possible. */
3934 if (needstype || needsscript)
3935 {
3936 if (needschar && !charsaved)
3937 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3938 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3939 if (needschar)
3940 {
3941 if (needstype)
3942 {
3943 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
3944 typereg = RETURN_ADDR;
3945 }
3946
3947 if (needsscript)
3948 scriptreg = TMP3;
3949 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3950 }
3951 else if (needstype && needsscript)
3952 scriptreg = TMP3;
3953 /* In all other cases only one of them was specified, and that can goes to TMP1. */
3954
3955 if (needsscript)
3956 {
3957 if (scriptreg == TMP1)
3958 {
3959 OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
3960 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3);
3961 }
3962 else
3963 {
3964 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
3965 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
3966 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0);
3967 }
3968 }
3969 }
3970 #endif
3971
3972 /* Generating code. */
3973 cc = ccbegin;
3974 charoffset = 0;
3975 numberofcmps = 0;
3976 #ifdef SUPPORT_UCP
3977 typeoffset = 0;
3978 #endif
3979
3980 while (*cc != XCL_END)
3981 {
3982 compares--;
3983 invertcmp = (compares == 0 && list != backtracks);
3984 jump = NULL;
3985
3986 if (*cc == XCL_SINGLE)
3987 {
3988 cc ++;
3989 #ifdef SUPPORT_UTF
3990 if (common->utf)
3991 {
3992 GETCHARINC(c, cc);
3993 }
3994 else
3995 #endif
3996 c = *cc++;
3997
3998 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
3999 {
4000 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4001 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_EQUAL);
4002 numberofcmps++;
4003 }
4004 else if (numberofcmps > 0)
4005 {
4006 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4007 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4008 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4009 numberofcmps = 0;
4010 }
4011 else
4012 {
4013 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
4014 numberofcmps = 0;
4015 }
4016 }
4017 else if (*cc == XCL_RANGE)
4018 {
4019 cc ++;
4020 #ifdef SUPPORT_UTF
4021 if (common->utf)
4022 {
4023 GETCHARINC(c, cc);
4024 }
4025 else
4026 #endif
4027 c = *cc++;
4028 SET_CHAR_OFFSET(c);
4029 #ifdef SUPPORT_UTF
4030 if (common->utf)
4031 {
4032 GETCHARINC(c, cc);
4033 }
4034 else
4035 #endif
4036 c = *cc++;
4037 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
4038 {
4039 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4040 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);
4041 numberofcmps++;
4042 }
4043 else if (numberofcmps > 0)
4044 {
4045 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4046 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4047 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4048 numberofcmps = 0;
4049 }
4050 else
4051 {
4052 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
4053 numberofcmps = 0;
4054 }
4055 }
4056 #ifdef SUPPORT_UCP
4057 else
4058 {
4059 if (*cc == XCL_NOTPROP)
4060 invertcmp ^= 0x1;
4061 cc++;
4062 switch(*cc)
4063 {
4064 case PT_ANY:
4065 if (list != backtracks)
4066 {
4067 if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0))
4068 continue;
4069 }
4070 else if (cc[-1] == XCL_NOTPROP)
4071 continue;
4072 jump = JUMP(SLJIT_JUMP);
4073 break;
4074
4075 case PT_LAMP:
4076 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
4077 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4078 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
4079 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4080 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
4081 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4082 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4083 break;
4084
4085 case PT_GC:
4086 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
4087 SET_TYPE_OFFSET(c);
4088 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
4089 break;
4090
4091 case PT_PC:
4092 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
4093 break;
4094
4095 case PT_SC:
4096 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, scriptreg, 0, SLJIT_IMM, (int)cc[1]);
4097 break;
4098
4099 case PT_SPACE:
4100 case PT_PXSPACE:
4101 if (*cc == PT_SPACE)
4102 {
4103 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4104 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 11 - charoffset);
4105 }
4106 SET_CHAR_OFFSET(9);
4107 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 13 - 9);
4108 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4109 if (*cc == PT_SPACE)
4110 JUMPHERE(jump);
4111
4112 SET_TYPE_OFFSET(ucp_Zl);
4113 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
4114 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4115 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4116 break;
4117
4118 case PT_WORD:
4119 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE - charoffset);
4120 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4121 /* Fall through. */
4122
4123 case PT_ALNUM:
4124 SET_TYPE_OFFSET(ucp_Ll);
4125 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4126 OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, (*cc == PT_ALNUM) ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);
4127 SET_TYPE_OFFSET(ucp_Nd);
4128 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4129 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4130 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4131 break;
4132
4133 case PT_CLIST:
4134 other_cases = PRIV(ucd_caseless_sets) + cc[1];
4135
4136 /* At least three characters are required.
4137 Otherwise this case would be handled by the normal code path. */
4138 SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
4139 SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
4140
4141 /* Optimizing character pairs, if their difference is power of 2. */
4142 if (is_powerof2(other_cases[1] ^ other_cases[0]))
4143 {
4144 if (charoffset == 0)
4145 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4146 else
4147 {
4148 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
4149 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4150 }
4151 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
4152 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4153 other_cases += 2;
4154 }
4155 else if (is_powerof2(other_cases[2] ^ other_cases[1]))
4156 {
4157 if (charoffset == 0)
4158 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
4159 else
4160 {
4161 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
4162 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4163 }
4164 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
4165 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4166
4167 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, other_cases[0] - charoffset);
4168 OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4169
4170 other_cases += 3;
4171 }
4172 else
4173 {
4174 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, *other_cases++ - charoffset);
4175 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4176 }
4177
4178 while (*other_cases != NOTACHAR)
4179 {
4180 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, *other_cases++ - charoffset);
4181 OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4182 }
4183 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4184 break;
4185
4186 case PT_UCNC:
4187 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_DOLLAR_SIGN - charoffset);
4188 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4189 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_COMMERCIAL_AT - charoffset);
4190 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4191 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_GRAVE_ACCENT - charoffset);
4192 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4193
4194 SET_CHAR_OFFSET(0xa0);
4195 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd7ff - charoffset);
4196 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4197 SET_CHAR_OFFSET(0);
4198 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
4199 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_GREATER_EQUAL);
4200 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4201 break;
4202 }
4203 cc += 2;
4204 }
4205 #endif
4206
4207 if (jump != NULL)
4208 add_jump(compiler, compares > 0 ? list : backtracks, jump);
4209 }
4210
4211 if (found != NULL)
4212 set_jumps(found, LABEL());
4213 }
4214
4215 #undef SET_TYPE_OFFSET
4216 #undef SET_CHAR_OFFSET
4217
4218 #endif
4219
4220 static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks)
4221 {
4222 DEFINE_COMPILER;
4223 int length;
4224 unsigned int c, oc, bit;
4225 compare_context context;
4226 struct sljit_jump *jump[4];
4227 jump_list *end_list;
4228 #ifdef SUPPORT_UTF
4229 struct sljit_label *label;
4230 #ifdef SUPPORT_UCP
4231 pcre_uchar propdata[5];
4232 #endif
4233 #endif
4234
4235 switch(type)
4236 {
4237 case OP_SOD:
4238 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4239 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4240 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
4241 return cc;
4242
4243 case OP_SOM:
4244 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4245 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
4246 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
4247 return cc;
4248
4249 case OP_NOT_WORD_BOUNDARY:
4250 case OP_WORD_BOUNDARY:
4251 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
4252 add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4253 return cc;
4254
4255 case OP_NOT_DIGIT:
4256 case OP_DIGIT:
4257 /* Digits are usually 0-9, so it is worth to optimize them. */
4258 if (common->digits[0] == -2)
4259 get_ctype_ranges(common, ctype_digit, common->digits);
4260 detect_partial_match(common, backtracks);
4261 /* Flip the starting bit in the negative case. */
4262 if (type == OP_NOT_DIGIT)
4263 common->digits[1] ^= 1;
4264 if (!check_ranges(common, common->digits, backtracks, TRUE))
4265 {
4266 read_char8_type(common);
4267 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
4268 add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4269 }
4270 if (type == OP_NOT_DIGIT)
4271 common->digits[1] ^= 1;
4272 return cc;
4273
4274 case OP_NOT_WHITESPACE:
4275 case OP_WHITESPACE:
4276 detect_partial_match(common, backtracks);
4277 read_char8_type(common);
4278 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
4279 add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4280 return cc;
4281
4282 case OP_NOT_WORDCHAR:
4283 case OP_WORDCHAR:
4284 detect_partial_match(common, backtracks);
4285 read_char8_type(common);
4286 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
4287 add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4288 return cc;
4289
4290 case OP_ANY:
4291 detect_partial_match(common, backtracks);
4292 read_char(common);
4293 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4294 {
4295 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
4296 end_list = NULL;
4297 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
4298 add_jump(compiler, &end_list, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4299 else
4300 check_str_end(common, &end_list);
4301
4302 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4303 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
4304 set_jumps(end_list, LABEL());
4305 JUMPHERE(jump[0]);
4306 }
4307 else
4308 check_newlinechar(common, common->nltype, backtracks, TRUE);
4309 return cc;
4310
4311 case OP_ALLANY:
4312 detect_partial_match(common, backtracks);
4313 #ifdef SUPPORT_UTF
4314 if (common->utf)
4315 {
4316 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4317 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4318 #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
4319 #if defined COMPILE_PCRE8
4320 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4321 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4322 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4323 #elif defined COMPILE_PCRE16
4324 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
4325 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4326 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4327 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4328 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4329 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4330 #endif
4331 JUMPHERE(jump[0]);
4332 #endif /* COMPILE_PCRE[8|16] */
4333 return cc;
4334 }
4335 #endif
4336 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4337 return cc;
4338
4339 case OP_ANYBYTE:
4340 detect_partial_match(common, backtracks);
4341 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4342 return cc;
4343
4344 #ifdef SUPPORT_UTF
4345 #ifdef SUPPORT_UCP
4346 case OP_NOTPROP:
4347 case OP_PROP:
4348 propdata[0] = 0;
4349 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
4350 propdata[2] = cc[0];
4351 propdata[3] = cc[1];
4352 propdata[4] = XCL_END;
4353 compile_xclass_matchingpath(common, propdata, backtracks);
4354 return cc + 2;
4355 #endif
4356 #endif
4357
4358 case OP_ANYNL:
4359 detect_partial_match(common, backtracks);
4360 read_char(common);
4361 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4362 /* We don't need to handle soft partial matching case. */
4363 end_list = NULL;
4364 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
4365 add_jump(compiler, &end_list, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4366 else
4367 check_str_end(common, &end_list);
4368 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4369 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
4370 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4371 jump[2] = JUMP(SLJIT_JUMP);
4372 JUMPHERE(jump[0]);
4373 check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
4374 set_jumps(end_list, LABEL());
4375 JUMPHERE(jump[1]);
4376 JUMPHERE(jump[2]);
4377 return cc;
4378
4379 case OP_NOT_HSPACE:
4380 case OP_HSPACE:
4381 detect_partial_match(common, backtracks);
4382 read_char(common);
4383 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
4384 add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4385 return cc;
4386
4387 case OP_NOT_VSPACE:
4388 case OP_VSPACE:
4389 detect_partial_match(common, backtracks);
4390 read_char(common);
4391 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
4392 add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4393 return cc;
4394
4395 #ifdef SUPPORT_UCP
4396 case OP_EXTUNI:
4397 detect_partial_match(common, backtracks);
4398 read_char(common);
4399 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4400 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
4401 /* Optimize register allocation: use a real register. */
4402 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
4403 OP1(SLJIT_MOV_UB, STACK_TOP, 0, SLJIT_MEM2(TMP1, TMP2), 3);
4404
4405 label = LABEL();
4406 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4407 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
4408 read_char(common);
4409 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4410 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
4411 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM2(TMP1, TMP2), 3);
4412
4413 OP2(SLJIT_SHL, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2);
4414 OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_sw)PRIV(ucp_gbtable));
4415 OP1(SLJIT_MOV, STACK_TOP, 0, TMP2, 0);
4416 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4417 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4418 JUMPTO(SLJIT_C_NOT_ZERO, label);
4419
4420 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
4421 JUMPHERE(jump[0]);
4422 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
4423
4424 if (common->mode == JIT_PARTIAL_HARD_COMPILE)
4425 {
4426 jump[0] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
4427 /* Since we successfully read a char above, partial matching must occure. */
4428 check_partial(common, TRUE);
4429 JUMPHERE(jump[0]);
4430 }
4431 return cc;
4432 #endif
4433
4434 case OP_EODN:
4435 /* Requires rather complex checks. */
4436 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4437 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4438 {
4439 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4440 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4441 if (common->mode == JIT_COMPILE)
4442 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
4443 else
4444 {
4445 jump[1] = CMP(SLJIT_C_EQUAL, TMP2, 0, STR_END, 0);
4446 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
4447 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS);
4448 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
4449 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_NOT_EQUAL);
4450 add_jump(compiler, backtracks, JUMP(SLJIT_C_NOT_EQUAL));
4451 check_partial(common, TRUE);
4452 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4453 JUMPHERE(jump[1]);
4454 }
4455 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4456 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4457 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4458 }
4459 else if (common->nltype == NLTYPE_FIXED)
4460 {
4461 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4462 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4463 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
4464 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
4465 }
4466 else
4467 {
4468 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4469 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4470 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4471 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
4472 jump[2] = JUMP(SLJIT_C_GREATER);
4473 add_jump(compiler, backtracks, JUMP(SLJIT_C_LESS));
4474 /* Equal. */
4475 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4476 jump[3] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
4477 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4478
4479 JUMPHERE(jump[1]);
4480 if (common->nltype == NLTYPE_ANYCRLF)
4481 {
4482 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4483 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, STR_END, 0));
4484 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
4485 }
4486 else
4487 {
4488 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, STR_PTR, 0);
4489 read_char(common);
4490 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
4491 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
4492 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4493 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
4494 }
4495 JUMPHERE(jump[2]);
4496 JUMPHERE(jump[3]);
4497 }
4498 JUMPHERE(jump[0]);
4499 check_partial(common, FALSE);
4500 return cc;
4501
4502 case OP_EOD:
4503 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
4504 check_partial(common, FALSE);
4505 return cc;
4506
4507 case OP_CIRC:
4508 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4509 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
4510 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0));
4511 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
4512 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4513 return cc;
4514
4515 case OP_CIRCM:
4516 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4517 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
4518 jump[1] = CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0);
4519 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
4520 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4521 jump[0] = JUMP(SLJIT_JUMP);
4522 JUMPHERE(jump[1]);
4523
4524 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4525 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4526 {
4527 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4528 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, TMP1, 0));
4529 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4530 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4531 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4532 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4533 }
4534 else
4535 {
4536 skip_char_back(common);
4537 read_char(common);
4538 check_newlinechar(common, common->nltype, backtracks, FALSE);
4539 }
4540 JUMPHERE(jump[0]);
4541 return cc;
4542
4543 case OP_DOLL:
4544 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4545 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
4546 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4547
4548 if (!common->endonly)
4549 compile_char1_matchingpath(common, OP_EODN, cc, backtracks);
4550 else
4551 {
4552 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
4553 check_partial(common, FALSE);
4554 }
4555 return cc;
4556
4557 case OP_DOLLM:
4558 jump[1] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
4559 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4560 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
4561 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4562 check_partial(common, FALSE);
4563 jump[0] = JUMP(SLJIT_JUMP);
4564 JUMPHERE(jump[1]);
4565
4566 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4567 {
4568 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4569 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4570 if (common->mode == JIT_COMPILE)
4571 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, TMP2, 0, STR_END, 0));
4572 else
4573 {
4574 jump[1] = CMP(SLJIT_C_LESS_EQUAL, TMP2, 0, STR_END, 0);
4575 /* STR_PTR = STR_END - IN_UCHARS(1) */
4576 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4577 check_partial(common, TRUE);
4578 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4579 JUMPHERE(jump[1]);
4580 }
4581
4582 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4583 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4584 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4585 }
4586 else
4587 {
4588 peek_char(common);
4589 check_newlinechar(common, common->nltype, backtracks, FALSE);
4590 }
4591 JUMPHERE(jump[0]);
4592 return cc;
4593
4594 case OP_CHAR:
4595 case OP_CHARI:
4596 length = 1;
4597 #ifdef SUPPORT_UTF
4598 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
4599 #endif
4600 if (common->mode == JIT_COMPILE && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
4601 {
4602 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
4603 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
4604
4605 context.length = IN_UCHARS(length);
4606 context.sourcereg = -1;
4607 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4608 context.ucharptr = 0;
4609 #endif
4610 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
4611 }
4612 detect_partial_match(common, backtracks);
4613 read_char(common);
4614 #ifdef SUPPORT_UTF
4615 if (common->utf)
4616 {
4617 GETCHAR(c, cc);
4618 }
4619 else
4620 #endif
4621 c = *cc;
4622 if (type == OP_CHAR || !char_has_othercase(common, cc))
4623 {
4624 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
4625 return cc + length;
4626 }
4627 oc = char_othercase(common, c);
4628 bit = c ^ oc;
4629 if (is_powerof2(bit))
4630 {
4631 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
4632 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
4633 return cc + length;
4634 }
4635 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c);
4636 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4637 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
4638 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4639 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4640 return cc + length;
4641
4642 case OP_NOT:
4643 case OP_NOTI:
4644 detect_partial_match(common, backtracks);
4645 length = 1;
4646 #ifdef SUPPORT_UTF
4647 if (common->utf)
4648 {
4649 #ifdef COMPILE_PCRE8
4650 c = *cc;
4651 if (c < 128)
4652 {
4653 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4654 if (type == OP_NOT || !char_has_othercase(common, cc))
4655 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4656 else
4657 {
4658 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
4659 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
4660 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
4661 }
4662 /* Skip the variable-length character. */
4663 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4664 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4665 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4666 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4667 JUMPHERE(jump[0]);
4668 return cc + 1;
4669 }
4670 else
4671 #endif /* COMPILE_PCRE8 */
4672 {
4673 GETCHARLEN(c, cc, length);
4674 read_char(common);
4675 }
4676 }
4677 else
4678 #endif /* SUPPORT_UTF */
4679 {
4680 read_char(common);
4681 c = *cc;
4682 }
4683
4684 if (type == OP_NOT || !char_has_othercase(common, cc))
4685 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4686 else
4687 {
4688 oc = char_othercase(common, c);
4689 bit = c ^ oc;
4690 if (is_powerof2(bit))
4691 {
4692 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
4693 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
4694 }
4695 else
4696 {
4697 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4698 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, oc));
4699 }
4700 }
4701 return cc + length;
4702
4703 case OP_CLASS:
4704 case OP_NCLASS:
4705 detect_partial_match(common, backtracks);
4706 read_char(common);
4707 if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, backtracks))
4708 return cc + 32 / sizeof(pcre_uchar);
4709
4710 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4711 jump[0] = NULL;
4712 #ifdef COMPILE_PCRE8
4713 /* This check only affects 8 bit mode. In other modes, we
4714 always need to compare the value with 255. */
4715 if (common->utf)
4716 #endif /* COMPILE_PCRE8 */
4717 {
4718 jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4719 if (type == OP_CLASS)
4720 {
4721 add_jump(compiler, backtracks, jump[0]);
4722 jump[0] = NULL;
4723 }
4724 }
4725 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
4726 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4727 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4728 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4729 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4730 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4731 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4732 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4733 if (jump[0] != NULL)
4734 JUMPHERE(jump[0]);
4735 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
4736 return cc + 32 / sizeof(pcre_uchar);
4737
4738 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4739 case OP_XCLASS:
4740 compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
4741 return cc + GET(cc, 0) - 1;
4742 #endif
4743
4744 case OP_REVERSE:
4745 length = GET(cc, 0);
4746 if (length == 0)
4747 return cc + LINK_SIZE;
4748 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4749 #ifdef SUPPORT_UTF
4750 if (common->utf)
4751 {
4752 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4753 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
4754 label = LABEL();
4755 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
4756 skip_char_back(common);
4757 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
4758 JUMPTO(SLJIT_C_NOT_ZERO, label);
4759 }
4760 else
4761 #endif
4762 {
4763 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4764 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
4765 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0));
4766 }
4767 check_start_used_ptr(common);
4768 return cc + LINK_SIZE;
4769 }
4770 SLJIT_ASSERT_STOP();
4771 return cc;
4772 }
4773
4774 static SLJIT_INLINE pcre_uchar *compile_charn_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks)
4775 {
4776 /* This function consumes at least one input character. */
4777 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
4778 DEFINE_COMPILER;
4779 pcre_uchar *ccbegin = cc;
4780 compare_context context;
4781 int size;
4782
4783 context.length = 0;
4784 do
4785 {
4786 if (cc >= ccend)
4787 break;
4788
4789 if (*cc == OP_CHAR)
4790 {
4791 size = 1;
4792 #ifdef SUPPORT_UTF
4793 if (common->utf && HAS_EXTRALEN(cc[1]))
4794 size += GET_EXTRALEN(cc[1]);
4795 #endif
4796 }
4797 else if (*cc == OP_CHARI)
4798 {
4799 size = 1;
4800 #ifdef SUPPORT_UTF
4801 if (common->utf)
4802 {
4803 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
4804 size = 0;
4805 else if (HAS_EXTRALEN(cc[1]))
4806 size += GET_EXTRALEN(cc[1]);
4807 }
4808 else
4809 #endif
4810 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
4811 size = 0;
4812 }
4813 else
4814 size = 0;
4815
4816 cc += 1 + size;
4817 context.length += IN_UCHARS(size);
4818 }
4819 while (size > 0 && context.length <= 128);
4820
4821 cc = ccbegin;
4822 if (context.length > 0)
4823 {
4824 /* We have a fixed-length byte sequence. */
4825 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
4826 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
4827
4828 context.sourcereg = -1;
4829 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4830 context.ucharptr = 0;
4831 #endif
4832 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
4833 return cc;
4834 }
4835
4836 /* A non-fixed length character will be checked if length == 0. */
4837 return compile_char1_matchingpath(common, *cc, cc + 1, backtracks);
4838 }
4839
4840 static struct sljit_jump *compile_ref_checks(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
4841 {
4842 DEFINE_COMPILER;
4843 int offset = GET2(cc, 1) << 1;
4844
4845 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
4846 if (!common->jscript_compat)
4847 {
4848 if (backtracks == NULL)
4849 {
4850 /* OVECTOR(1) contains the "string begin - 1" constant. */
4851 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
4852 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4853 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
4854 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4855 return JUMP(SLJIT_C_NOT_ZERO);
4856 }
4857 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
4858 }
4859 return CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
4860 }
4861
4862 /* Forward definitions. */
4863 static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);
4864 static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
4865
4866 #define PUSH_BACKTRACK(size, ccstart, error) \
4867 do \
4868 { \
4869 backtrack = sljit_alloc_memory(compiler, (size)); \
4870 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
4871 return error; \
4872 memset(backtrack, 0, size); \
4873 backtrack->prev = parent->top; \
4874 backtrack->cc = (ccstart); \
4875 parent->top = backtrack; \
4876 } \
4877 while (0)
4878
4879 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
4880 do \
4881 { \
4882 backtrack = sljit_alloc_memory(compiler, (size)); \
4883 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
4884 return; \
4885 memset(backtrack, 0, size); \
4886 backtrack->prev = parent->top; \
4887 backtrack->cc = (ccstart); \
4888 parent->top = backtrack; \
4889 } \
4890 while (0)
4891
4892 #define BACKTRACK_AS(type) ((type *)backtrack)
4893
4894 static pcre_uchar *compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
4895 {
4896 DEFINE_COMPILER;
4897 int offset = GET2(cc, 1) << 1;
4898 struct sljit_jump *jump = NULL;
4899 struct sljit_jump *partial;
4900 struct sljit_jump *nopartial;
4901
4902 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
4903 /* OVECTOR(1) contains the "string begin - 1" constant. */
4904 if (withchecks && !common->jscript_compat)
4905 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
4906
4907 #if defined SUPPORT_UTF && defined SUPPORT_UCP
4908 if (common->utf && *cc == OP_REFI)
4909 {
4910 SLJIT_ASSERT(TMP1 == SLJIT_SCRATCH_REG1 && STACK_TOP == SLJIT_SCRATCH_REG2 && TMP2 == SLJIT_SCRATCH_REG3);
4911 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
4912 if (withchecks)
4913 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, TMP2, 0);
4914
4915 /* Needed to save important temporary registers. */
4916 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
4917 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0);
4918 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0);
4919 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
4920 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
4921 if (common->mode == JIT_COMPILE)
4922 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
4923 else
4924 {
4925 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
4926 nopartial = CMP(SLJIT_C_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
4927 check_partial(common, FALSE);
4928 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4929 JUMPHERE(nopartial);
4930 }
4931 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
4932 }
4933 else
4934 #endif /* SUPPORT_UTF && SUPPORT_UCP */
4935 {
4936 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0);
4937 if (withchecks)
4938 jump = JUMP(SLJIT_C_ZERO);
4939
4940 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4941 partial = CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0);
4942 if (common->mode == JIT_COMPILE)
4943 add_jump(compiler, backtracks, partial);
4944
4945 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
4946 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4947
4948 if (common->mode != JIT_COMPILE)
4949 {
4950 nopartial = JUMP(SLJIT_JUMP);
4951 JUMPHERE(partial);
4952 /* TMP2 -= STR_END - STR_PTR */
4953 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
4954 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
4955 partial = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0);
4956 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
4957 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
4958 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4959 JUMPHERE(partial);
4960 check_partial(common, FALSE);
4961 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4962 JUMPHERE(nopartial);
4963 }
4964 }
4965
4966 if (jump != NULL)
4967 {
4968 if (emptyfail)
4969 add_jump(compiler, backtracks, jump);
4970 else
4971 JUMPHERE(jump);
4972 }
4973 return cc + 1 + IMM2_SIZE;
4974 }
4975
4976 static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
4977 {
4978 DEFINE_COMPILER;
4979 backtrack_common *backtrack;
4980 pcre_uchar type;
4981 struct sljit_label *label;
4982 struct sljit_jump *zerolength;
4983 struct sljit_jump *jump = NULL;
4984 pcre_uchar *ccbegin = cc;
4985 int min = 0, max = 0;
4986 BOOL minimize;
4987
4988 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
4989
4990 type = cc[1 + IMM2_SIZE];
4991 minimize = (type & 0x1) != 0;
4992 switch(type)
4993 {
4994 case OP_CRSTAR:
4995 case OP_CRMINSTAR:
4996 min = 0;
4997 max = 0;
4998 cc += 1 + IMM2_SIZE + 1;
4999 break;
5000 case OP_CRPLUS:
5001 case OP_CRMINPLUS:
5002 min = 1;
5003 max = 0;
5004 cc += 1 + IMM2_SIZE + 1;
5005 break;
5006 case OP_CRQUERY:
5007 case OP_CRMINQUERY:
5008 min = 0;
5009 max = 1;
5010 cc += 1 + IMM2_SIZE + 1;
5011 break;
5012 case OP_CRRANGE:
5013 case OP_CRMINRANGE:
5014 min = GET2(cc, 1 + IMM2_SIZE + 1);
5015 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
5016 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
5017 break;
5018 default:
5019 SLJIT_ASSERT_STOP();
5020 break;
5021 }
5022
5023 if (!minimize)
5024 {
5025 if (min == 0)
5026 {
5027 allocate_stack(common, 2);
5028 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5029 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
5030 /* Temporary release of STR_PTR. */
5031 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5032 zerolength = compile_ref_checks(common, ccbegin, NULL);
5033 /* Restore if not zero length. */
5034 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5035 }
5036 else
5037 {
5038 allocate_stack(common, 1);
5039 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5040 zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks);
5041 }
5042
5043 if (min > 1 || max > 1)
5044 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
5045
5046 label = LABEL();
5047 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
5048
5049 if (min > 1 || max > 1)
5050 {
5051 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
5052 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5053 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
5054 if (min > 1)
5055 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, label);
5056 if (max > 1)
5057 {
5058 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
5059 allocate_stack(common, 1);
5060 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5061 JUMPTO(SLJIT_JUMP, label);
5062 JUMPHERE(jump);
5063 }
5064 }
5065
5066 if (max == 0)
5067 {
5068 /* Includes min > 1 case as well. */
5069 allocate_stack(common, 1);
5070 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5071 JUMPTO(SLJIT_JUMP, label);
5072 }
5073
5074 JUMPHERE(zerolength);
5075 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
5076
5077 decrease_call_count(common);
5078 return cc;
5079 }
5080
5081 allocate_stack(common, 2);
5082 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5083 if (type != OP_CRMINSTAR)
5084 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
5085
5086 if (min == 0)
5087 {
5088 zerolength = compile_ref_checks(common, ccbegin, NULL);
5089 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5090 jump = JUMP(SLJIT_JUMP);
5091 }
5092 else
5093 zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks);
5094
5095 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
5096 if (max > 0)
5097 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
5098
5099 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
5100 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5101
5102 if (min > 1)
5103 {
5104 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5105 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5106 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5107 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(iterator_backtrack)->matchingpath);
5108 }
5109 else if (max > 0)
5110 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
5111
5112 if (jump != NULL)
5113 JUMPHERE(jump);
5114 JUMPHERE(zerolength);
5115
5116 decrease_call_count(common);
5117 return cc;
5118 }
5119
5120 static SLJIT_INLINE pcre_uchar *compile_recurse_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5121 {
5122 DEFINE_COMPILER;
5123 backtrack_common *backtrack;
5124 recurse_entry *entry = common->entries;
5125 recurse_entry *prev = NULL;
5126 int start = GET(cc, 1);
5127 pcre_uchar *start_cc;
5128
5129 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
5130
5131 /* Inlining simple patterns. */
5132 if (get_framesize(common, common->start + start, TRUE) == no_stack)
5133 {
5134 start_cc = common->start + start;
5135 compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
5136 BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
5137 return cc + 1 + LINK_SIZE;
5138 }
5139
5140 while (entry != NULL)
5141 {
5142 if (entry->start == start)
5143 break;
5144 prev = entry;
5145 entry = entry->next;
5146 }
5147
5148 if (entry == NULL)
5149 {
5150 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
5151 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5152 return NULL;
5153 entry->next = NULL;
5154 entry->entry = NULL;
5155 entry->calls = NULL;
5156 entry->start = start;
5157
5158 if (prev != NULL)
5159 prev->next = entry;
5160 else
5161 common->entries = entry;
5162 }
5163
5164 if (common->has_set_som && common->mark_ptr != 0)
5165 {
5166 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
5167 allocate_stack(common, 2);
5168 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
5169 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5170 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5171 }
5172 else if (common->has_set_som || common->mark_ptr != 0)
5173 {
5174 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr);
5175 allocate_stack(common, 1);
5176 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5177 }
5178
5179 if (entry->entry == NULL)
5180 add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
5181 else
5182 JUMPTO(SLJIT_FAST_CALL, entry->entry);
5183 /* Leave if the match is failed. */
5184 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0));
5185 return cc + 1 + LINK_SIZE;
5186 }
5187
5188 static int SLJIT_CALL do_callout(struct jit_arguments* arguments, PUBL(callout_block) *callout_block, pcre_uchar **jit_ovector)
5189 {
5190 const pcre_uchar *begin = arguments->begin;
5191 int *offset_vector = arguments->offsets;
5192 int offset_count = arguments->offset_count;
5193 int i;
5194
5195 if (PUBL(callout) == NULL)
5196 return 0;
5197
5198 callout_block->version = 2;
5199 callout_block->callout_data = arguments->callout_data;
5200
5201 /* Offsets in subject. */
5202 callout_block->subject_length = arguments->end - arguments->begin;
5203 callout_block->start_match = (pcre_uchar*)callout_block->subject - arguments->begin;
5204 callout_block->current_position = (pcre_uchar*)callout_block->offset_vector - arguments->begin;
5205 #if defined COMPILE_PCRE8
5206 callout_block->subject = (PCRE_SPTR)begin;
5207 #elif defined COMPILE_PCRE16
5208 callout_block->subject = (PCRE_SPTR16)begin;
5209 #elif defined COMPILE_PCRE32
5210 callout_block->subject = (PCRE_SPTR32)begin;
5211 #endif
5212
5213 /* Convert and copy the JIT offset vector to the offset_vector array. */
5214 callout_block->capture_top = 0;
5215 callout_block->offset_vector = offset_vector;
5216 for (i = 2; i < offset_count; i += 2)
5217 {
5218 offset_vector[i] = jit_ovector[i] - begin;
5219 offset_vector[i + 1] = jit_ovector[i + 1] - begin;
5220 if (jit_ovector[i] >= begin)
5221 callout_block->capture_top = i;
5222 }
5223
5224 callout_block->capture_top = (callout_block->capture_top >> 1) + 1;
5225 if (offset_count > 0)
5226 offset_vector[0] = -1;
5227 if (offset_count > 1)
5228 offset_vector[1] = -1;
5229 return (*PUBL(callout))(callout_block);
5230 }
5231
5232 /* Aligning to 8 byte. */
5233 #define CALLOUT_ARG_SIZE \
5234 (((int)sizeof(PUBL(callout_block)) + 7) & ~7)
5235
5236 #define CALLOUT_ARG_OFFSET(arg) \
5237 (-CALLOUT_ARG_SIZE + SLJIT_OFFSETOF(PUBL(callout_block), arg))
5238
5239 static SLJIT_INLINE pcre_uchar *compile_callout_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5240 {
5241 DEFINE_COMPILER;
5242 backtrack_common *backtrack;
5243
5244 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
5245
5246 allocate_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
5247
5248 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
5249 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5250 SLJIT_ASSERT(common->capture_last_ptr != 0);
5251 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, cc[1]);
5252 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
5253
5254 /* These pointer sized fields temporarly stores internal variables. */
5255 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
5256 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
5257 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(subject), TMP2, 0);
5258
5259 if (common->mark_ptr != 0)
5260 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
5261 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 2));
5262 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 2 + LINK_SIZE));
5263 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
5264
5265 /* Needed to save important temporary registers. */
5266 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
5267 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG2, 0, STACK_TOP, 0, SLJIT_IMM, CALLOUT_ARG_SIZE);
5268 GET_LOCAL_BASE(SLJIT_SCRATCH_REG3, 0, OVECTOR_START);
5269 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout));
5270 OP1(SLJIT_MOV_SI, SLJIT_RETURN_REG, 0, SLJIT_RETURN_REG, 0);
5271 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
5272 free_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
5273
5274 /* Check return value. */
5275 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
5276 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_C_SIG_GREATER));
5277 if (common->forced_quit_label == NULL)
5278 add_jump(compiler, &common->forced_quit, JUMP(SLJIT_C_SIG_LESS));
5279 else
5280 JUMPTO(SLJIT_C_SIG_LESS, common->forced_quit_label);
5281 return cc + 2 + 2 * LINK_SIZE;
5282 }
5283
5284 #undef CALLOUT_ARG_SIZE
5285 #undef CALLOUT_ARG_OFFSET
5286
5287 static pcre_uchar *compile_assert_matchingpath(compiler_common *common, pcre_uchar *cc, assert_backtrack *backtrack, BOOL conditional)
5288 {
5289 DEFINE_COMPILER;
5290 int framesize;
5291 int private_data_ptr;
5292 backtrack_common altbacktrack;
5293 pcre_uchar *ccbegin;
5294 pcre_uchar opcode;
5295 pcre_uchar bra = OP_BRA;
5296 jump_list *tmp = NULL;
5297 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
5298 jump_list **found;
5299 /* Saving previous accept variables. */
5300 struct sljit_label *save_quit_label = common->quit_label;
5301 struct sljit_label *save_accept_label = common->accept_label;
5302 jump_list *save_quit = common->quit;
5303 jump_list *save_accept = common->accept;
5304 struct sljit_jump *jump;
5305 struct sljit_jump *brajump = NULL;
5306
5307 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
5308 {
5309 SLJIT_ASSERT(!conditional);
5310 bra = *cc;
5311 cc++;
5312 }
5313 private_data_ptr = PRIVATE_DATA(cc);
5314 SLJIT_ASSERT(private_data_ptr != 0);
5315 framesize = get_framesize(common, cc, FALSE);
5316 backtrack->framesize = framesize;
5317 backtrack->private_data_ptr = private_data_ptr;
5318 opcode = *cc;
5319 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
5320 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
5321 ccbegin = cc;
5322 cc += GET(cc, 1);
5323
5324 if (bra == OP_BRAMINZERO)
5325 {
5326 /* This is a braminzero backtrack path. */
5327 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5328 free_stack(common, 1);
5329 brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5330 }
5331
5332 if (framesize < 0)
5333 {
5334 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
5335 allocate_stack(common, 1);
5336 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5337 }
5338 else
5339 {
5340 allocate_stack(common, framesize + 2);
5341 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5342 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(framesize + 1));
5343 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
5344 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5345 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5346 init_frame(common, ccbegin, framesize + 1, 2, FALSE);
5347 }
5348
5349 memset(&altbacktrack, 0, sizeof(backtrack_common));
5350 common->quit_label = NULL;
5351 common->quit = NULL;
5352 while (1)
5353 {
5354 common->accept_label = NULL;
5355 common->accept = NULL;
5356 altbacktrack.top = NULL;
5357 altbacktrack.topbacktracks = NULL;
5358
5359 if (*ccbegin == OP_ALT)
5360 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5361
5362 altbacktrack.cc = ccbegin;
5363 compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
5364 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5365 {
5366 common->quit_label = save_quit_label;
5367 common->accept_label = save_accept_label;
5368 common->quit = save_quit;
5369 common->accept = save_accept;
5370 return NULL;
5371 }
5372 common->accept_label = LABEL();
5373 if (common->accept != NULL)
5374 set_jumps(common->accept, common->accept_label);
5375
5376 /* Reset stack. */
5377 if (framesize < 0)
5378 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5379 else {
5380 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
5381 {
5382 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5383 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
5384 }
5385 else
5386 {
5387 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5388 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5389 }
5390 }
5391
5392 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
5393 {
5394 /* We know that STR_PTR was stored on the top of the stack. */
5395 if (conditional)
5396 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
5397 else if (bra == OP_BRAZERO)
5398 {
5399 if (framesize < 0)
5400 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
5401 else
5402 {
5403 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
5404 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_sw));
5405 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5406 }
5407 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5408 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5409 }
5410 else if (framesize >= 0)
5411 {
5412 /* For OP_BRA and OP_BRAMINZERO. */
5413 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
5414 }
5415 }
5416 add_jump(compiler, found, JUMP(SLJIT_JUMP));
5417
5418 compile_backtrackingpath(common, altbacktrack.top);
5419 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5420 {
5421 common->quit_label = save_quit_label;
5422 common->accept_label = save_accept_label;
5423 common->quit = save_quit;
5424 common->accept = save_accept;
5425 return NULL;
5426 }
5427 set_jumps(altbacktrack.topbacktracks, LABEL());
5428
5429 if (*cc != OP_ALT)
5430 break;
5431
5432 ccbegin = cc;
5433 cc += GET(cc, 1);
5434 }
5435 /* None of them matched. */
5436 if (common->quit != NULL)
5437 set_jumps(common->quit, LABEL());
5438
5439 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
5440 {
5441 /* Assert is failed. */
5442 if (conditional || bra == OP_BRAZERO)
5443 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5444
5445 if (framesize < 0)
5446 {
5447 /* The topmost item should be 0. */
5448 if (bra == OP_BRAZERO)
5449 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5450 else
5451 free_stack(common, 1);
5452 }
5453 else
5454 {
5455 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5456 /* The topmost item should be 0. */
5457 if (bra == OP_BRAZERO)
5458 {
5459 free_stack(common, framesize + 1);
5460 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5461 }
5462 else
5463 free_stack(common, framesize + 2);
5464 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5465 }
5466 jump = JUMP(SLJIT_JUMP);
5467 if (bra != OP_BRAZERO)
5468 add_jump(compiler, target, jump);
5469
5470 /* Assert is successful. */
5471 set_jumps(tmp, LABEL());
5472 if (framesize < 0)
5473 {
5474 /* We know that STR_PTR was stored on the top of the stack. */
5475 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
5476 /* Keep the STR_PTR on the top of the stack. */
5477 if (bra == OP_BRAZERO)
5478 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5479 else if (bra == OP_BRAMINZERO)
5480 {
5481 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5482 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5483 }
5484 }
5485 else
5486 {
5487 if (bra == OP_BRA)
5488 {
5489 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5490 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
5491 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
5492 }
5493 else
5494 {
5495 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5496 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));
5497 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5498 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
5499 }
5500 }
5501
5502 if (bra == OP_BRAZERO)
5503 {
5504 backtrack->matchingpath = LABEL();
5505 SET_LABEL(jump, backtrack->matchingpath);
5506 }
5507 else if (bra == OP_BRAMINZERO)
5508 {
5509 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
5510 JUMPHERE(brajump);
5511 if (framesize >= 0)
5512 {
5513 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5514 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5515 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
5516 }
5517 set_jumps(backtrack->common.topbacktracks, LABEL());
5518 }
5519 }
5520 else
5521 {
5522 /* AssertNot is successful. */
5523 if (framesize < 0)
5524 {
5525 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5526 if (bra != OP_BRA)
5527 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5528 else
5529 free_stack(common, 1);
5530 }
5531 else
5532 {
5533 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5534 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5535 /* The topmost item should be 0. */
5536 if (bra != OP_BRA)
5537 {
5538 free_stack(common, framesize + 1);
5539 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5540 }
5541 else
5542 free_stack(common, framesize + 2);
5543 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5544 }
5545
5546 if (bra == OP_BRAZERO)
5547 backtrack->matchingpath = LABEL();
5548 else if (bra == OP_BRAMINZERO)
5549 {
5550 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
5551 JUMPHERE(brajump);
5552 }
5553
5554 if (bra != OP_BRA)
5555 {
5556 SLJIT_ASSERT(found == &backtrack->common.topbacktracks);
5557 set_jumps(backtrack->common.topbacktracks, LABEL());
5558 backtrack->common.topbacktracks = NULL;
5559 }
5560 }
5561
5562 common->quit_label = save_quit_label;
5563 common->accept_label = save_accept_label;
5564 common->quit = save_quit;
5565 common->accept = save_accept;
5566 return cc + 1 + LINK_SIZE;
5567 }
5568
5569 static sljit_sw SLJIT_CALL do_searchovector(sljit_uw refno, sljit_sw* locals, pcre_uchar *name_table)
5570 {
5571 int condition = FALSE;
5572 pcre_uchar *slotA = name_table;
5573 pcre_uchar *slotB;
5574 sljit_sw name_count = locals[LOCALS0 / sizeof(sljit_sw)];
5575 sljit_sw name_entry_size = locals[LOCALS1 / sizeof(sljit_sw)];
5576 sljit_sw no_capture;
5577 int i;
5578
5579 locals += refno & 0xff;
5580 refno >>= 8;
5581 no_capture = locals[1];
5582
5583 for (i = 0; i < name_count; i++)
5584 {
5585 if (GET2(slotA, 0) == refno) break;
5586 slotA += name_entry_size;
5587 }
5588
5589 if (i < name_count)
5590 {
5591 /* Found a name for the number - there can be only one; duplicate names
5592 for different numbers are allowed, but not vice versa. First scan down
5593 for duplicates. */
5594
5595 slotB = slotA;
5596 while (slotB > name_table)
5597 {
5598 slotB -= name_entry_size;
5599 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5600 {
5601 condition = locals[GET2(slotB, 0) << 1] != no_capture;
5602 if (condition) break;
5603 }
5604 else break;
5605 }
5606
5607 /* Scan up for duplicates */
5608 if (!condition)
5609 {
5610 slotB = slotA;
5611 for (i++; i < name_count; i++)
5612 {
5613 slotB += name_entry_size;
5614 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5615 {
5616 condition = locals[GET2(slotB, 0) << 1] != no_capture;
5617 if (condition) break;
5618 }
5619 else break;
5620 }
5621 }
5622 }
5623 return condition;
5624 }
5625
5626 static sljit_sw SLJIT_CALL do_searchgroups(sljit_uw recno, sljit_uw* locals, pcre_uchar *name_table)
5627 {
5628 int condition = FALSE;
5629 pcre_uchar *slotA = name_table;
5630 pcre_uchar *slotB;
5631 sljit_uw name_count = locals[LOCALS0 / sizeof(sljit_sw)];
5632 sljit_uw name_entry_size = locals[LOCALS1 / sizeof(sljit_sw)];
5633 sljit_uw group_num = locals[POSSESSIVE0 / sizeof(sljit_sw)];
5634 sljit_uw i;
5635
5636 for (i = 0; i < name_count; i++)
5637 {
5638 if (GET2(slotA, 0) == recno) break;
5639 slotA += name_entry_size;
5640 }
5641
5642 if (i < name_count)
5643 {
5644 /* Found a name for the number - there can be only one; duplicate
5645 names for different numbers are allowed, but not vice versa. First
5646 scan down for duplicates. */
5647
5648 slotB = slotA;
5649 while (slotB > name_table)
5650 {
5651 slotB -= name_entry_size;
5652 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5653 {
5654 condition = GET2(slotB, 0) == group_num;
5655 if (condition) break;
5656 }
5657 else break;
5658 }
5659
5660 /* Scan up for duplicates */
5661 if (!condition)
5662 {
5663 slotB = slotA;
5664 for (i++; i < name_count; i++)
5665 {
5666 slotB += name_entry_size;
5667 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5668 {
5669 condition = GET2(slotB, 0) == group_num;
5670 if (condition) break;
5671 }
5672 else break;
5673 }
5674 }
5675 }
5676 return condition;
5677 }
5678
5679 /*
5680 Handling bracketed expressions is probably the most complex part.
5681
5682 Stack layout naming characters:
5683 S - Push the current STR_PTR
5684 0 - Push a 0 (NULL)
5685 A - Push the current STR_PTR. Needed for restoring the STR_PTR
5686 before the next alternative. Not pushed if there are no alternatives.
5687 M - Any values pushed by the current alternative. Can be empty, or anything.
5688 C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
5689 L - Push the previous local (pointed by localptr) to the stack
5690 () - opional values stored on the stack
5691 ()* - optonal, can be stored multiple times
5692
5693 The following list shows the regular expression templates, their PCRE byte codes
5694 and stack layout supported by pcre-sljit.
5695
5696 (?:) OP_BRA | OP_KET A M
5697 () OP_CBRA | OP_KET C M
5698 (?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )*
5699 OP_SBRA | OP_KETRMAX 0 L M S ( L M S )*
5700 (?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )*
5701 OP_SBRA | OP_KETRMIN 0 L M S ( L M S )*
5702 ()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )*
5703 OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )*
5704 ()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )*
5705 OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )*
5706 (?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 )
5707 (?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 )
5708 ()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 )
5709 ()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 )
5710 (?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )*
5711 OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )*
5712 (?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )*
5713 OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )*
5714 ()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )*
5715 OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )*
5716 ()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )*
5717 OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )*
5718
5719
5720 Stack layout naming characters:
5721 A - Push the alternative index (starting from 0) on the stack.
5722 Not pushed if there is no alternatives.
5723 M - Any values pushed by the current alternative. Can be empty, or anything.
5724
5725 The next list shows the possible content of a bracket:
5726 (|) OP_*BRA | OP_ALT ... M A
5727 (?()|) OP_*COND | OP_ALT M A
5728 (?>|) OP_ONCE | OP_ALT ... [stack trace] M A
5729 (?>|) OP_ONCE_NC | OP_ALT ... [stack trace] M A
5730 Or nothing, if trace is unnecessary
5731 */
5732
5733 static pcre_uchar *compile_bracket_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5734 {
5735 DEFINE_COMPILER;
5736 backtrack_common *backtrack;
5737 pcre_uchar opcode;
5738 int private_data_ptr = 0;
5739 int offset = 0;
5740 int stacksize;
5741 pcre_uchar *ccbegin;
5742 pcre_uchar *matchingpath;
5743 pcre_uchar bra = OP_BRA;
5744 pcre_uchar ket;
5745 assert_backtrack *assert;
5746 BOOL has_alternatives;
5747 struct sljit_jump *jump;
5748 struct sljit_jump *skip;
5749 struct sljit_label *rmaxlabel = NULL;
5750 struct sljit_jump *braminzerojump = NULL;
5751
5752 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
5753
5754 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
5755 {
5756 bra = *cc;
5757 cc++;
5758 opcode = *cc;
5759 }
5760
5761 opcode = *cc;
5762 ccbegin = cc;
5763 matchingpath = ccbegin + 1 + LINK_SIZE;
5764
5765 if ((opcode == OP_COND || opcode == OP_SCOND) && cc[1 + LINK_SIZE] == OP_DEF)
5766 {
5767 /* Drop this bracket_backtrack. */
5768 parent->top = backtrack->prev;
5769 return bracketend(cc);
5770 }
5771
5772 ket = *(bracketend(cc) - 1 - LINK_SIZE);
5773 SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
5774 SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
5775 cc += GET(cc, 1);
5776
5777 has_alternatives = *cc == OP_ALT;
5778 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
5779 {
5780 has_alternatives = (*matchingpath == OP_RREF) ? FALSE : TRUE;
5781 if (*matchingpath == OP_NRREF)
5782 {
5783 stacksize = GET2(matchingpath, 1);
5784 if (common->currententry == NULL || stacksize == RREF_ANY)
5785 has_alternatives = FALSE;
5786 else if (common->currententry->start == 0)
5787 has_alternatives = stacksize != 0;
5788 else
5789 has_alternatives = stacksize != (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
5790 }
5791 }
5792
5793 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
5794 opcode = OP_SCOND;
5795 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
5796 opcode = OP_ONCE;
5797
5798 if (opcode == OP_CBRA || opcode == OP_SCBRA)
5799 {
5800 /* Capturing brackets has a pre-allocated space. */
5801 offset = GET2(ccbegin, 1 + LINK_SIZE);
5802 if (common->optimized_cbracket[offset] == 0)
5803 {
5804 private_data_ptr = OVECTOR_PRIV(offset);
5805 offset <<= 1;
5806 }
5807 else
5808 {
5809 offset <<= 1;
5810 private_data_ptr = OVECTOR(offset);
5811 }
5812 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
5813 matchingpath += IMM2_SIZE;
5814 }
5815 else if (opcode == OP_ONCE || opcode == OP_SBRA || opcode == OP_SCOND)
5816 {
5817 /* Other brackets simply allocate the next entry. */
5818 private_data_ptr = PRIVATE_DATA(ccbegin);
5819 SLJIT_ASSERT(private_data_ptr != 0);
5820 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
5821 if (opcode == OP_ONCE)
5822 BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, FALSE);
5823 }
5824
5825 /* Instructions before the first alternative. */
5826 stacksize = 0;
5827 if ((ket == OP_KETRMAX) || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
5828 stacksize++;
5829 if (bra == OP_BRAZERO)
5830 stacksize++;
5831
5832 if (stacksize > 0)
5833 allocate_stack(common, stacksize);
5834
5835 stacksize = 0;
5836 if ((ket == OP_KETRMAX) || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
5837 {
5838 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
5839 stacksize++;
5840 }
5841
5842 if (bra == OP_BRAZERO)
5843 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
5844
5845 if (bra == OP_BRAMINZERO)
5846 {
5847 /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
5848 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5849 if (ket != OP_KETRMIN)
5850 {
5851 free_stack(common, 1);
5852 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5853 }
5854 else
5855 {
5856 if (opcode == OP_ONCE || opcode >= OP_SBRA)
5857 {
5858 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5859 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5860 /* Nothing stored during the first run. */
5861 skip = JUMP(SLJIT_JUMP);
5862 JUMPHERE(jump);
5863 /* Checking zero-length iteration. */
5864 if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
5865 {
5866 /* When we come from outside, private_data_ptr contains the previous STR_PTR. */
5867 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5868 }
5869 else
5870 {
5871 /* Except when the whole stack frame must be saved. */
5872 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5873 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (BACKTRACK_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_sw));
5874 }
5875 JUMPHERE(skip);
5876 }
5877 else
5878 {
5879 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5880 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5881 JUMPHERE(jump);
5882 }
5883 }
5884 }
5885
5886 if (ket == OP_KETRMIN)
5887 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
5888
5889 if (ket == OP_KETRMAX)
5890 {
5891 rmaxlabel = LABEL();
5892 if (has_alternatives && opcode != OP_ONCE && opcode < OP_SBRA)
5893 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmaxlabel;
5894 }
5895
5896 /* Handling capturing brackets and alternatives. */
5897 if (opcode == OP_ONCE)
5898 {
5899 if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
5900 {
5901 /* Neither capturing brackets nor recursions are not found in the block. */
5902 if (ket == OP_KETRMIN)
5903 {
5904 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5905 allocate_stack(common, 2);
5906 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5907 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
5908 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5909 }
5910 else if (ket == OP_KETRMAX || has_alternatives)
5911 {
5912 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
5913 allocate_stack(common, 1);
5914 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5915 }
5916 else
5917 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
5918 }
5919 else
5920 {
5921 if (ket == OP_KETRMIN || ket == OP_KETRMAX || has_alternatives)
5922 {
5923 allocate_stack(common, BACKTRACK_AS(bracket_backtrack)->u.framesize + 2);
5924 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5925 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(BACKTRACK_AS(bracket_backtrack)->u.framesize + 1));
5926 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5927 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
5928 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5929 init_frame(common, ccbegin, BACKTRACK_AS(bracket_backtrack)->u.framesize + 1, 2, FALSE);
5930 }
5931 else
5932 {
5933 allocate_stack(common, BACKTRACK_AS(bracket_backtrack)->u.framesize + 1);
5934 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5935 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(BACKTRACK_AS(bracket_backtrack)->u.framesize));
5936 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
5937 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
5938 init_frame(common, ccbegin, BACKTRACK_AS(bracket_backtrack)->u.framesize, 1, FALSE);
5939 }
5940 }
5941 }
5942 else if (opcode == OP_CBRA || opcode == OP_SCBRA)
5943 {
5944 /* Saving the previous values. */
5945 if (common->optimized_cbracket[offset >> 1] != 0)
5946 {
5947 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset));
5948 allocate_stack(common, 2);
5949 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5950 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr + sizeof(sljit_sw));
5951 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
5952 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
5953 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
5954 }
5955 else
5956 {
5957 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5958 allocate_stack(common, 1);
5959 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
5960 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5961 }
5962 }
5963 else if (opcode == OP_SBRA || opcode == OP_SCOND)
5964 {
5965 /* Saving the previous value. */
5966 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5967 allocate_stack(common, 1);
5968 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
5969 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5970 }
5971 else if (has_alternatives)
5972 {
5973 /* Pushing the starting string pointer. */
5974 allocate_stack(common, 1);
5975 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5976 }
5977
5978 /* Generating code for the first alternative. */
5979 if (opcode == OP_COND || opcode == OP_SCOND)
5980 {
5981 if (*matchingpath == OP_CREF)
5982 {
5983 SLJIT_ASSERT(has_alternatives);
5984 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed),
5985 CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5986 matchingpath += 1 + IMM2_SIZE;
5987 }
5988 else if (*matchingpath == OP_NCREF)
5989 {
5990 SLJIT_ASSERT(has_alternatives);
5991 stacksize = GET2(matchingpath, 1);
5992 jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(stacksize << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
5993
5994 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
5995 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
5996 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
5997 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, (stacksize << 8) | (common->ovector_start / sizeof(sljit_sw)));
5998 GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, 0);
5999 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, common->name_table);
6000 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchovector));
6001 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
6002 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, 0));
6003
6004 JUMPHERE(jump);
6005 matchingpath += 1 + IMM2_SIZE;
6006 }
6007 else if (*matchingpath == OP_RREF || *matchingpath == OP_NRREF)
6008 {
6009 /* Never has other case. */
6010 BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;
6011
6012 stacksize = GET2(matchingpath, 1);
6013 if (common->currententry == NULL)
6014 stacksize = 0;
6015 else if (stacksize == RREF_ANY)
6016 stacksize = 1;
6017 else if (common->currententry->start == 0)
6018 stacksize = stacksize == 0;
6019 else
6020 stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
6021
6022 if (*matchingpath == OP_RREF || stacksize || common->currententry == NULL)
6023 {
6024 SLJIT_ASSERT(!has_alternatives);
6025 if (stacksize != 0)
6026 matchingpath += 1 + IMM2_SIZE;
6027 else
6028 {
6029 if (*cc == OP_ALT)
6030 {
6031 matchingpath = cc + 1 + LINK_SIZE;
6032 cc += GET(cc, 1);
6033 }
6034 else
6035 matchingpath = cc;
6036 }
6037 }
6038 else
6039 {
6040 SLJIT_ASSERT(has_alternatives);
6041
6042 stacksize = GET2(matchingpath, 1);
6043 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
6044 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
6045 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
6046 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, GET2(common->start, common->currententry->start + 1 + LINK_SIZE));
6047 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, stacksize);
6048 GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, 0);
6049 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, common->name_table);
6050 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchgroups));
6051 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
6052 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, 0));
6053 matchingpath += 1 + IMM2_SIZE;
6054 }
6055 }
6056 else
6057 {
6058 SLJIT_ASSERT(has_alternatives && *matchingpath >= OP_ASSERT && *matchingpath <= OP_ASSERTBACK_NOT);
6059 /* Similar code as PUSH_BACKTRACK macro. */
6060 assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack));
6061 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6062 return NULL;
6063 memset(assert, 0, sizeof(assert_backtrack));
6064 assert->common.cc = matchingpath;
6065 BACKTRACK_AS(bracket_backtrack)->u.assert = assert;
6066 matchingpath = compile_assert_matchingpath(common, matchingpath, assert, TRUE);
6067 }
6068 }
6069
6070 compile_matchingpath(common, matchingpath, cc, backtrack);
6071 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6072 return NULL;
6073
6074 if (opcode == OP_ONCE)
6075 {
6076 if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
6077 {
6078 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6079 /* TMP2 which is set here used by OP_KETRMAX below. */
6080 if (ket == OP_KETRMAX)
6081 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0);
6082 else if (ket == OP_KETRMIN)
6083 {
6084 /* Move the STR_PTR to the private_data_ptr. */
6085 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), 0);
6086 }
6087 }
6088 else
6089 {
6090 stacksize = (ket == OP_KETRMIN || ket == OP_KETRMAX || has_alternatives) ? 2 : 1;
6091 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize) * sizeof(sljit_sw));
6092 if (ket == OP_KETRMAX)
6093 {
6094 /* TMP2 which is set here used by OP_KETRMAX below. */
6095 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6096 }
6097 }
6098 }
6099
6100 stacksize = 0;
6101 if (ket != OP_KET || bra != OP_BRA)
6102 stacksize++;
6103 if (offset != 0)
6104 {
6105 if (common->capture_last_ptr != 0)
6106 stacksize++;
6107 if (common->optimized_cbracket[offset >> 1] == 0)
6108 stacksize += 2;
6109 }
6110 if (has_alternatives && opcode != OP_ONCE)
6111 stacksize++;
6112
6113 if (stacksize > 0)
6114 allocate_stack(common, stacksize);
6115
6116 stacksize = 0;
6117 if (ket != OP_KET || bra != OP_BRA)
6118 {
6119 if (ket != OP_KET)
6120 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6121 else
6122 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
6123 stacksize++;
6124 }
6125
6126 if (offset != 0)
6127 {
6128 if (common->capture_last_ptr != 0)
6129 {
6130 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
6131 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
6132 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0);
6133 stacksize++;
6134 }
6135 if (common->optimized_cbracket[offset >> 1] == 0)
6136 {
6137 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
6138 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
6139 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6140 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6141 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
6142 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6143 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6144 stacksize += 2;
6145 }
6146 }
6147
6148 if (has_alternatives)
6149 {
6150 if (opcode != OP_ONCE)
6151 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
6152 if (ket != OP_KETRMAX)
6153 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
6154 }
6155
6156 /* Must be after the matchingpath label. */
6157 if (offset != 0 && common->optimized_cbracket[offset >> 1] != 0)
6158 {
6159 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
6160 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6161 }
6162
6163 if (ket == OP_KETRMAX)
6164 {
6165 if (opcode == OP_ONCE || opcode >= OP_SBRA)
6166 {
6167 if (has_alternatives)
6168 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
6169 /* Checking zero-length iteration. */
6170 if (opcode != OP_ONCE)
6171 {
6172 CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0, rmaxlabel);
6173 /* Drop STR_PTR for greedy plus quantifier. */
6174 if (bra != OP_BRAZERO)
6175 free_stack(common, 1);
6176 }
6177 else
6178 /* TMP2 must contain the starting STR_PTR. */
6179 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmaxlabel);
6180 }
6181 else
6182 JUMPTO(SLJIT_JUMP, rmaxlabel);
6183 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
6184 }
6185
6186 if (bra == OP_BRAZERO)
6187 BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL();
6188
6189 if (bra == OP_BRAMINZERO)
6190 {
6191 /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */
6192 JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath);
6193 if (braminzerojump != NULL)
6194 {
6195 JUMPHERE(braminzerojump);
6196 /* We need to release the end pointer to perform the
6197 backtrack for the zero-length iteration. When
6198 framesize is < 0, OP_ONCE will do the release itself. */
6199 if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0)
6200 {
6201 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6202 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6203 }
6204 else if (ket == OP_KETRMIN && opcode != OP_ONCE)
6205 free_stack(common, 1);
6206 }
6207 /* Continue to the normal backtrack. */
6208 }
6209
6210 if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
6211 decrease_call_count(common);
6212
6213 /* Skip the other alternatives. */
6214 while (*cc == OP_ALT)
6215 cc += GET(cc, 1);
6216 cc += 1 + LINK_SIZE;
6217 return cc;
6218 }
6219
6220 static pcre_uchar *compile_bracketpos_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6221 {
6222 DEFINE_COMPILER;
6223 backtrack_common *backtrack;
6224 pcre_uchar opcode;
6225 int private_data_ptr;
6226 int cbraprivptr = 0;
6227 int framesize;
6228 int stacksize;
6229 int offset = 0;
6230 BOOL zero = FALSE;
6231 pcre_uchar *ccbegin = NULL;
6232 int stack;
6233 struct sljit_label *loop = NULL;
6234 struct jump_list *emptymatch = NULL;
6235
6236 PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL);
6237 if (*cc == OP_BRAPOSZERO)
6238 {
6239 zero = TRUE;
6240 cc++;
6241 }
6242
6243 opcode = *cc;
6244 private_data_ptr = PRIVATE_DATA(cc);
6245 SLJIT_ASSERT(private_data_ptr != 0);
6246 BACKTRACK_AS(bracketpos_backtrack)->private_data_ptr = private_data_ptr;
6247 switch(opcode)
6248 {
6249 case OP_BRAPOS:
6250 case OP_SBRAPOS:
6251 ccbegin = cc + 1 + LINK_SIZE;
6252 break;
6253
6254 case OP_CBRAPOS:
6255 case OP_SCBRAPOS:
6256 offset = GET2(cc, 1 + LINK_SIZE);
6257 /* This case cannot be optimized in the same was as
6258 normal capturing brackets. */
6259 SLJIT_ASSERT(common->optimized_cbracket[offset] == 0);
6260 cbraprivptr = OVECTOR_PRIV(offset);
6261 offset <<= 1;
6262 ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
6263 break;
6264
6265 default:
6266 SLJIT_ASSERT_STOP();
6267 break;
6268 }
6269
6270 framesize = get_framesize(common, cc, FALSE);
6271 BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;
6272 if (framesize < 0)
6273 {
6274 if (offset != 0)
6275 {
6276 stacksize = 2;
6277 if (common->capture_last_ptr != 0)
6278 stacksize++;
6279 }
6280 else
6281 stacksize = 1;
6282
6283 if (!zero)
6284 stacksize++;
6285
6286 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
6287 allocate_stack(common, stacksize);
6288 if (framesize == no_frame)
6289 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
6290
6291 if (offset != 0)
6292 {
6293 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
6294 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
6295 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
6296 if (common->capture_last_ptr != 0)
6297 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
6298 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
6299 if (common->capture_last_ptr != 0)
6300 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
6301 }
6302 else
6303 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6304
6305 if (!zero)
6306 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 1);
6307 }
6308 else
6309 {
6310 stacksize = framesize + 1;
6311 if (!zero)
6312 stacksize++;
6313 if (opcode == OP_BRAPOS || opcode == OP_SBRAPOS)
6314 stacksize++;
6315 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
6316
6317 allocate_stack(common, stacksize);
6318 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6319 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(stacksize - 1));
6320 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
6321
6322 stack = 0;
6323 if (!zero)
6324 {
6325 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
6326 stack++;
6327 }
6328 if (opcode == OP_BRAPOS || opcode == OP_SBRAPOS)
6329 {
6330 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
6331 stack++;
6332 }
6333 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
6334 init_frame(common, cc, stacksize - 1, stacksize - framesize, FALSE);
6335 }
6336
6337 if (offset != 0)
6338 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
6339
6340 loop = LABEL();
6341 while (*cc != OP_KETRPOS)
6342 {
6343 backtrack->top = NULL;
6344 backtrack->topbacktracks = NULL;
6345 cc += GET(cc, 1);
6346
6347 compile_matchingpath(common, ccbegin, cc, backtrack);
6348 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6349 return NULL;
6350
6351 if (framesize < 0)
6352 {
6353 if (framesize == no_frame)
6354 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6355
6356 if (offset != 0)
6357 {
6358 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
6359 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6360 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
6361 if (common->capture_last_ptr != 0)
6362 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
6363 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6364 }
6365 else
6366 {
6367 if (opcode == OP_SBRAPOS)
6368 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6369 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6370 }
6371
6372 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
6373 add_jump(compiler, &emptymatch, CMP(SLJIT_C_EQUAL, TMP1, 0, STR_PTR, 0));
6374
6375 if (!zero)
6376 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
6377 }
6378 else
6379 {
6380 if (offset != 0)
6381 {
6382 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, stacksize * sizeof(sljit_sw));
6383 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
6384 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6385 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
6386 if (common->capture_last_ptr != 0)
6387 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
6388 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6389 }
6390 else
6391 {
6392 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6393 OP2(SLJIT_ADD, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
6394 if (opcode == OP_SBRAPOS)
6395 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw));
6396 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw), STR_PTR, 0);
6397 }
6398
6399 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
6400 add_jump(compiler, &emptymatch, CMP(SLJIT_C_EQUAL, TMP1, 0, STR_PTR, 0));
6401
6402 if (!zero)
6403 {
6404 if (framesize < 0)
6405 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
6406 else
6407 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6408 }
6409 }
6410 JUMPTO(SLJIT_JUMP, loop);
6411 flush_stubs(common);
6412
6413 compile_backtrackingpath(common, backtrack->top);
6414 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6415 return NULL;
6416 set_jumps(backtrack->topbacktracks, LABEL());
6417
6418 if (framesize < 0)
6419 {
6420 if (offset != 0)
6421 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
6422 else
6423 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6424 }
6425 else
6426 {
6427 if (offset != 0)
6428 {
6429 /* Last alternative. */
6430 if (*cc == OP_KETRPOS)
6431 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6432 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
6433 }
6434 else
6435 {
6436 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6437 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw));
6438 }
6439 }
6440
6441 if (*cc == OP_KETRPOS)
6442 break;
6443 ccbegin = cc + 1 + LINK_SIZE;
6444 }
6445
6446 backtrack->topbacktracks = NULL;
6447 if (!zero)
6448 {
6449 if (framesize < 0)
6450 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0));
6451 else /* TMP2 is set to [private_data_ptr] above. */
6452 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(TMP2), (stacksize - 1) * sizeof(sljit_sw), SLJIT_IMM, 0));
6453 }
6454
6455 /* None of them matched. */
6456 set_jumps(emptymatch, LABEL());
6457 decrease_call_count(common);
6458 return cc + 1 + LINK_SIZE;
6459 }
6460
6461 static SLJIT_INLINE pcre_uchar *get_iterator_parameters(compiler_common *common, pcre_uchar *cc, pcre_uchar *opcode, pcre_uchar *type, int *arg1, int *arg2, pcre_uchar **end)
6462 {
6463 int class_len;
6464
6465 *opcode = *cc;
6466 if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO)
6467 {
6468 cc++;
6469 *type = OP_CHAR;
6470 }
6471 else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI)
6472 {
6473 cc++;
6474 *type = OP_CHARI;
6475 *opcode -= OP_STARI - OP_STAR;
6476 }
6477 else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO)
6478 {
6479 cc++;
6480 *type = OP_NOT;
6481 *opcode -= OP_NOTSTAR - OP_STAR;
6482 }
6483 else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI)
6484 {
6485 cc++;
6486 *type = OP_NOTI;
6487 *opcode -= OP_NOTSTARI - OP_STAR;
6488 }
6489 else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO)
6490 {
6491 cc++;
6492 *opcode -= OP_TYPESTAR - OP_STAR;
6493 *type = 0;
6494 }
6495 else
6496 {
6497 SLJIT_ASSERT(*opcode >= OP_CLASS || *opcode <= OP_XCLASS);
6498 *type = *opcode;
6499 cc++;
6500 class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(pcre_uchar))) : GET(cc, 0);
6501 *opcode = cc[class_len - 1];
6502 if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY)
6503 {
6504 *opcode -= OP_CRSTAR - OP_STAR;
6505 if (end != NULL)
6506 *end = cc + class_len;
6507 }
6508 else
6509 {
6510 SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE);
6511 *arg1 = GET2(cc, (class_len + IMM2_SIZE));
6512 *arg2 = GET2(cc, class_len);
6513
6514 if (*arg2 == 0)
6515 {
6516 SLJIT_ASSERT(*arg1 != 0);
6517 *opcode = (*opcode == OP_CRRANGE) ? OP_UPTO : OP_MINUPTO;
6518 }
6519 if (*arg1 == *arg2)
6520 *opcode = OP_EXACT;
6521
6522 if (end != NULL)
6523 *end = cc + class_len + 2 * IMM2_SIZE;
6524 }
6525 return cc;
6526 }
6527
6528 if (*opcode == OP_UPTO || *opcode == OP_MINUPTO || *opcode == OP_EXACT || *opcode == OP_POSUPTO)
6529 {
6530 *arg1 = GET2(cc, 0);
6531 cc += IMM2_SIZE;
6532 }
6533
6534 if (*type == 0)
6535 {
6536 *type = *cc;
6537 if (end != NULL)
6538 *end = next_opcode(common, cc);
6539 cc++;
6540 return cc;
6541 }
6542
6543 if (end != NULL)
6544 {
6545 *end = cc + 1;
6546 #ifdef SUPPORT_UTF
6547 if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc);
6548 #endif
6549 }
6550 return cc;
6551 }
6552
6553 static pcre_uchar *compile_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6554 {
6555 DEFINE_COMPILER;
6556 backtrack_common *backtrack;
6557 pcre_uchar opcode;
6558 pcre_uchar type;
6559 int arg1 = -1, arg2 = -1;
6560 pcre_uchar* end;
6561 jump_list *nomatch = NULL;
6562 struct sljit_jump *jump = NULL;
6563 struct sljit_label *label;
6564 int private_data_ptr = PRIVATE_DATA(cc);
6565 int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_LOCALS_REG);
6566 int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
6567 int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
6568 int tmp_base, tmp_offset;
6569
6570 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
6571
6572 cc = get_iterator_parameters(common, cc, &opcode, &type, &arg1, &arg2, &end);
6573
6574 switch(type)
6575 {
6576 case OP_NOT_DIGIT:
6577 case OP_DIGIT:
6578 case OP_NOT_WHITESPACE:
6579 case OP_WHITESPACE:
6580 case OP_NOT_WORDCHAR:
6581 case OP_WORDCHAR:
6582 case OP_ANY:
6583 case OP_ALLANY:
6584 case OP_ANYBYTE:
6585 case OP_ANYNL:
6586 case OP_NOT_HSPACE:
6587 case OP_HSPACE:
6588 case OP_NOT_VSPACE:
6589 case OP_VSPACE:
6590 case OP_CHAR:
6591 case OP_CHARI:
6592 case OP_NOT:
6593 case OP_NOTI:
6594 case OP_CLASS:
6595 case OP_NCLASS:
6596 tmp_base = TMP3;
6597 tmp_offset = 0;
6598 break;
6599
6600 default:
6601 SLJIT_ASSERT_STOP();
6602 /* Fall through. */
6603
6604 case OP_EXTUNI:
6605 case OP_XCLASS:
6606 case OP_NOTPROP:
6607 case OP_PROP:
6608 tmp_base = SLJIT_MEM1(SLJIT_LOCALS_REG);
6609 tmp_offset = POSSESSIVE0;
6610 break;
6611 }
6612
6613 switch(opcode)
6614 {
6615 case OP_STAR:
6616 case OP_PLUS:
6617 case OP_UPTO:
6618 case OP_CRRANGE:
6619 if (type == OP_ANYNL || type == OP_EXTUNI)
6620 {
6621 SLJIT_ASSERT(private_data_ptr == 0);
6622 if (opcode == OP_STAR || opcode == OP_UPTO)
6623 {
6624 allocate_stack(common, 2);
6625 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6626 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
6627 }
6628 else
6629 {
6630 allocate_stack(common, 1);
6631 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6632 }
6633
6634 if (opcode == OP_UPTO || opcode == OP_CRRANGE)
6635 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
6636
6637 label = LABEL();
6638 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
6639 if (opcode == OP_UPTO || opcode == OP_CRRANGE)
6640 {
6641 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
6642 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6643 if (opcode == OP_CRRANGE && arg2 > 0)
6644 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg2, label);
6645 if (opcode == OP_UPTO || (opcode == OP_CRRANGE && arg1 > 0))
6646 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, arg1);
6647 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
6648 }
6649
6650 /* We cannot use TMP3 because of this allocate_stack. */
6651 allocate_stack(common, 1);
6652 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6653 JUMPTO(SLJIT_JUMP, label);
6654 if (jump != NULL)
6655 JUMPHERE(jump);
6656 }
6657 else
6658 {
6659 if (opcode == OP_PLUS)
6660 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
6661 if (private_data_ptr == 0)
6662 allocate_stack(common, 2);
6663 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6664 if (opcode <= OP_PLUS)
6665 OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
6666 else
6667 OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, 1);
6668 label = LABEL();
6669 compile_char1_matchingpath(common, type, cc, &nomatch);
6670 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6671 if (opcode <= OP_PLUS)
6672 JUMPTO(SLJIT_JUMP, label);
6673 else if (opcode == OP_CRRANGE && arg1 == 0)
6674 {
6675 OP2(SLJIT_ADD, base, offset1, base, offset1, SLJIT_IMM, 1);
6676 JUMPTO(SLJIT_JUMP, label);
6677 }
6678 else
6679 {
6680 OP1(SLJIT_MOV, TMP1, 0, base, offset1);
6681 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6682 OP1(SLJIT_MOV, base, offset1, TMP1, 0);
6683 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 1, label);
6684 }
6685 set_jumps(nomatch, LABEL());
6686 if (opcode == OP_CRRANGE)
6687 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_LESS, base, offset1, SLJIT_IMM, arg2 + 1));
6688 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
6689 }
6690 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6691 break;
6692
6693 case OP_MINSTAR:
6694 case OP_MINPLUS:
6695 if (opcode == OP_MINPLUS)
6696 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
6697 if (private_data_ptr == 0)
6698 allocate_stack(common, 1);
6699 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6700 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6701 break;
6702
6703 case OP_MINUPTO:
6704 case OP_CRMINRANGE:
6705 if (private_data_ptr == 0)
6706 allocate_stack(common, 2);
6707 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6708 OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, 1);
6709 if (opcode == OP_CRMINRANGE)
6710 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
6711 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6712 break;
6713
6714 case OP_QUERY:
6715 case OP_MINQUERY:
6716 if (private_data_ptr == 0)
6717 allocate_stack(common, 1);
6718 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6719 if (opcode == OP_QUERY)
6720 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
6721 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6722 break;
6723
6724 case OP_EXACT:
6725 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, arg1);
6726 label = LABEL();
6727 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
6728 OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
6729 JUMPTO(SLJIT_C_NOT_ZERO, label);
6730 break;
6731
6732 case OP_POSSTAR:
6733 case OP_POSPLUS:
6734 case OP_POSUPTO:
6735 if (opcode == OP_POSPLUS)
6736 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
6737 if (opcode == OP_POSUPTO)
6738 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_IMM, arg1);
6739 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
6740 label = LABEL();
6741 compile_char1_matchingpath(common, type, cc, &nomatch);
6742 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
6743 if (opcode != OP_POSUPTO)
6744 JUMPTO(SLJIT_JUMP, label);
6745 else