211 |
|
|
212 |
/* The following structure is the key data type for the recursive |
/* The following structure is the key data type for the recursive |
213 |
code generator. It is allocated by compile_matchingpath, and contains |
code generator. It is allocated by compile_matchingpath, and contains |
214 |
the aguments for compile_backtrackingpath. Must be the first member |
the arguments for compile_backtrackingpath. Must be the first member |
215 |
of its descendants. */ |
of its descendants. */ |
216 |
typedef struct backtrack_common { |
typedef struct backtrack_common { |
217 |
/* Concatenation stack. */ |
/* Concatenation stack. */ |
306 |
int framesize; |
int framesize; |
307 |
} then_trap_backtrack; |
} then_trap_backtrack; |
308 |
|
|
309 |
#define MAX_RANGE_SIZE 6 |
#define MAX_RANGE_SIZE 4 |
310 |
|
|
311 |
typedef struct compiler_common { |
typedef struct compiler_common { |
312 |
/* The sljit ceneric compiler. */ |
/* The sljit ceneric compiler. */ |
369 |
int endonly; |
int endonly; |
370 |
/* Tables. */ |
/* Tables. */ |
371 |
sljit_sw ctypes; |
sljit_sw ctypes; |
|
int digits[2 + MAX_RANGE_SIZE]; |
|
372 |
/* Named capturing brackets. */ |
/* Named capturing brackets. */ |
373 |
sljit_uw name_table; |
pcre_uchar *name_table; |
374 |
sljit_sw name_count; |
sljit_sw name_count; |
375 |
sljit_sw name_entry_size; |
sljit_sw name_entry_size; |
376 |
|
|
407 |
jump_list *utfreadchar; |
jump_list *utfreadchar; |
408 |
#endif |
#endif |
409 |
#ifdef COMPILE_PCRE8 |
#ifdef COMPILE_PCRE8 |
410 |
|
jump_list *utfreadchar11; |
411 |
jump_list *utfreadtype8; |
jump_list *utfreadtype8; |
412 |
#endif |
#endif |
413 |
#endif /* SUPPORT_UTF */ |
#endif /* SUPPORT_UTF */ |
481 |
group contains the start / end character pointers, and the second is |
group contains the start / end character pointers, and the second is |
482 |
the start pointers when the end of the capturing group has not yet reached. */ |
the start pointers when the end of the capturing group has not yet reached. */ |
483 |
#define OVECTOR_START (common->ovector_start) |
#define OVECTOR_START (common->ovector_start) |
484 |
#define OVECTOR(i) (OVECTOR_START + (i) * sizeof(sljit_sw)) |
#define OVECTOR(i) (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw)) |
485 |
#define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * sizeof(sljit_sw)) |
#define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw)) |
486 |
#define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start]) |
#define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start]) |
487 |
|
|
488 |
#if defined COMPILE_PCRE8 |
#if defined COMPILE_PCRE8 |
533 |
return cc; |
return cc; |
534 |
} |
} |
535 |
|
|
536 |
|
static int ones_in_half_byte[16] = { |
537 |
|
/* 0 */ 0, 1, 1, 2, /* 4 */ 1, 2, 2, 3, |
538 |
|
/* 8 */ 1, 2, 2, 3, /* 12 */ 2, 3, 3, 4 |
539 |
|
}; |
540 |
|
|
541 |
/* Functions whose might need modification for all new supported opcodes: |
/* Functions whose might need modification for all new supported opcodes: |
542 |
next_opcode |
next_opcode |
543 |
check_opcode_types |
check_opcode_types |
590 |
case OP_CRMINQUERY: |
case OP_CRMINQUERY: |
591 |
case OP_CRRANGE: |
case OP_CRRANGE: |
592 |
case OP_CRMINRANGE: |
case OP_CRMINRANGE: |
593 |
|
case OP_CRPOSSTAR: |
594 |
|
case OP_CRPOSPLUS: |
595 |
|
case OP_CRPOSQUERY: |
596 |
|
case OP_CRPOSRANGE: |
597 |
case OP_CLASS: |
case OP_CLASS: |
598 |
case OP_NCLASS: |
case OP_NCLASS: |
599 |
case OP_REF: |
case OP_REF: |
600 |
case OP_REFI: |
case OP_REFI: |
601 |
|
case OP_DNREF: |
602 |
|
case OP_DNREFI: |
603 |
case OP_RECURSE: |
case OP_RECURSE: |
604 |
case OP_CALLOUT: |
case OP_CALLOUT: |
605 |
case OP_ALT: |
case OP_ALT: |
625 |
case OP_SCBRAPOS: |
case OP_SCBRAPOS: |
626 |
case OP_SCOND: |
case OP_SCOND: |
627 |
case OP_CREF: |
case OP_CREF: |
628 |
case OP_NCREF: |
case OP_DNCREF: |
629 |
case OP_RREF: |
case OP_RREF: |
630 |
case OP_NRREF: |
case OP_DNRREF: |
631 |
case OP_DEF: |
case OP_DEF: |
632 |
case OP_BRAZERO: |
case OP_BRAZERO: |
633 |
case OP_BRAMINZERO: |
case OP_BRAMINZERO: |
747 |
|
|
748 |
static BOOL check_opcode_types(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend) |
static BOOL check_opcode_types(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend) |
749 |
{ |
{ |
750 |
pcre_uchar *name; |
int count; |
751 |
pcre_uchar *name2; |
pcre_uchar *slot; |
|
unsigned int cbra_index; |
|
|
int i; |
|
752 |
|
|
753 |
/* Calculate important variables (like stack size) and checks whether all opcodes are supported. */ |
/* Calculate important variables (like stack size) and checks whether all opcodes are supported. */ |
754 |
while (cc < ccend) |
while (cc < ccend) |
782 |
break; |
break; |
783 |
|
|
784 |
case OP_CREF: |
case OP_CREF: |
785 |
i = GET2(cc, 1); |
common->optimized_cbracket[GET2(cc, 1)] = 0; |
|
common->optimized_cbracket[i] = 0; |
|
786 |
cc += 1 + IMM2_SIZE; |
cc += 1 + IMM2_SIZE; |
787 |
break; |
break; |
788 |
|
|
789 |
case OP_NCREF: |
case OP_DNREF: |
790 |
cbra_index = GET2(cc, 1); |
case OP_DNREFI: |
791 |
name = (pcre_uchar *)common->name_table; |
case OP_DNCREF: |
792 |
name2 = name; |
count = GET2(cc, 1 + IMM2_SIZE); |
793 |
for (i = 0; i < common->name_count; i++) |
slot = common->name_table + GET2(cc, 1) * common->name_entry_size; |
794 |
{ |
while (count-- > 0) |
795 |
if (GET2(name, 0) == cbra_index) break; |
{ |
796 |
name += common->name_entry_size; |
common->optimized_cbracket[GET2(slot, 0)] = 0; |
797 |
} |
slot += common->name_entry_size; |
|
SLJIT_ASSERT(i != common->name_count); |
|
|
|
|
|
for (i = 0; i < common->name_count; i++) |
|
|
{ |
|
|
if (STRCMP_UC_UC(name2 + IMM2_SIZE, name + IMM2_SIZE) == 0) |
|
|
common->optimized_cbracket[GET2(name2, 0)] = 0; |
|
|
name2 += common->name_entry_size; |
|
798 |
} |
} |
799 |
cc += 1 + IMM2_SIZE; |
cc += 1 + 2 * IMM2_SIZE; |
800 |
break; |
break; |
801 |
|
|
802 |
case OP_RECURSE: |
case OP_RECURSE: |
2375 |
|
|
2376 |
static void check_partial(compiler_common *common, BOOL force) |
static void check_partial(compiler_common *common, BOOL force) |
2377 |
{ |
{ |
2378 |
/* Checks whether a partial matching is occured. Does not modify registers. */ |
/* Checks whether a partial matching is occurred. Does not modify registers. */ |
2379 |
DEFINE_COMPILER; |
DEFINE_COMPILER; |
2380 |
struct sljit_jump *jump = NULL; |
struct sljit_jump *jump = NULL; |
2381 |
|
|
2472 |
#endif |
#endif |
2473 |
|
|
2474 |
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); |
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); |
2475 |
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
2476 |
#if defined SUPPORT_UTF && !defined COMPILE_PCRE32 |
#if defined SUPPORT_UTF && !defined COMPILE_PCRE32 |
2477 |
if (common->utf) |
if (common->utf) |
2478 |
{ |
{ |
2485 |
JUMPHERE(jump); |
JUMPHERE(jump); |
2486 |
} |
} |
2487 |
#endif /* SUPPORT_UTF && !COMPILE_PCRE32 */ |
#endif /* SUPPORT_UTF && !COMPILE_PCRE32 */ |
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
|
2488 |
} |
} |
2489 |
|
|
2490 |
static void peek_char(compiler_common *common) |
static void peek_char(compiler_common *common) |
2505 |
#elif defined COMPILE_PCRE16 |
#elif defined COMPILE_PCRE16 |
2506 |
jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800); |
jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800); |
2507 |
#endif /* COMPILE_PCRE[8|16] */ |
#endif /* COMPILE_PCRE[8|16] */ |
2508 |
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
2509 |
add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL)); |
add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL)); |
2510 |
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0); |
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0); |
2511 |
JUMPHERE(jump); |
JUMPHERE(jump); |
2513 |
#endif /* SUPPORT_UTF && !COMPILE_PCRE32 */ |
#endif /* SUPPORT_UTF && !COMPILE_PCRE32 */ |
2514 |
} |
} |
2515 |
|
|
2516 |
static void read_char8_type(compiler_common *common) |
#if defined SUPPORT_UTF && defined COMPILE_PCRE8 |
2517 |
|
|
2518 |
|
static BOOL is_char7_bitset(const pcre_uint8* bitset, BOOL nclass) |
2519 |
|
{ |
2520 |
|
const pcre_uint8 value = nclass ? 0xff : 0; |
2521 |
|
const pcre_uint8* end = bitset + 32; |
2522 |
|
|
2523 |
|
bitset += 16; |
2524 |
|
do |
2525 |
|
{ |
2526 |
|
if (*bitset++ != value) |
2527 |
|
return FALSE; |
2528 |
|
} |
2529 |
|
while (bitset < end); |
2530 |
|
return TRUE; |
2531 |
|
} |
2532 |
|
|
2533 |
|
static void read_char7_type(compiler_common *common, BOOL full_read) |
2534 |
|
{ |
2535 |
|
/* Reads the precise character type of a character into TMP1, if the character is |
2536 |
|
less than 128. Otherwise it returns with zero. */ |
2537 |
|
DEFINE_COMPILER; |
2538 |
|
struct sljit_jump *jump; |
2539 |
|
|
2540 |
|
SLJIT_ASSERT(common->utf); |
2541 |
|
|
2542 |
|
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0); |
2543 |
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
2544 |
|
|
2545 |
|
OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes); |
2546 |
|
|
2547 |
|
if (full_read) |
2548 |
|
{ |
2549 |
|
jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0); |
2550 |
|
OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0); |
2551 |
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); |
2552 |
|
JUMPHERE(jump); |
2553 |
|
} |
2554 |
|
} |
2555 |
|
|
2556 |
|
#endif /* SUPPORT_UTF && COMPILE_PCRE8 */ |
2557 |
|
|
2558 |
|
static void read_char_max(compiler_common *common, pcre_uint32 max, BOOL full_read) |
2559 |
|
{ |
2560 |
|
/* Reads the precise value of a character into TMP1, if the character is |
2561 |
|
less than or equal to max. Otherwise it returns with a value greater than max. */ |
2562 |
|
DEFINE_COMPILER; |
2563 |
|
#if defined SUPPORT_UTF && !defined COMPILE_PCRE32 |
2564 |
|
struct sljit_jump *jump; |
2565 |
|
#endif |
2566 |
|
|
2567 |
|
SLJIT_UNUSED_ARG(full_read); |
2568 |
|
SLJIT_UNUSED_ARG(max); |
2569 |
|
|
2570 |
|
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); |
2571 |
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
2572 |
|
|
2573 |
|
#if defined SUPPORT_UTF && defined COMPILE_PCRE8 |
2574 |
|
if (common->utf) |
2575 |
|
{ |
2576 |
|
if (max < 128 && !full_read) |
2577 |
|
return; |
2578 |
|
|
2579 |
|
jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0); |
2580 |
|
if (max < 128) |
2581 |
|
{ |
2582 |
|
OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0); |
2583 |
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); |
2584 |
|
} |
2585 |
|
else if (max < 0x400) |
2586 |
|
{ |
2587 |
|
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); |
2588 |
|
if (!full_read) |
2589 |
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
2590 |
|
else |
2591 |
|
OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0); |
2592 |
|
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f); |
2593 |
|
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6); |
2594 |
|
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); |
2595 |
|
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); |
2596 |
|
if (full_read) |
2597 |
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0); |
2598 |
|
} |
2599 |
|
else |
2600 |
|
add_jump(compiler, (max < 0x800) ? &common->utfreadchar11 : &common->utfreadchar, JUMP(SLJIT_FAST_CALL)); |
2601 |
|
JUMPHERE(jump); |
2602 |
|
} |
2603 |
|
#endif |
2604 |
|
|
2605 |
|
#if defined SUPPORT_UTF && defined COMPILE_PCRE16 |
2606 |
|
if (common->utf) |
2607 |
|
{ |
2608 |
|
if (max < 0xd800 && !full_read) |
2609 |
|
return; |
2610 |
|
|
2611 |
|
if (max >= 0x10000) |
2612 |
|
{ |
2613 |
|
jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800); |
2614 |
|
add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL)); |
2615 |
|
JUMPHERE(jump); |
2616 |
|
return; |
2617 |
|
} |
2618 |
|
|
2619 |
|
/* Skip low surrogate if necessary. */ |
2620 |
|
OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800); |
2621 |
|
jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1); |
2622 |
|
if (full_read) |
2623 |
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
2624 |
|
if (max >= 0xd800) |
2625 |
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000); |
2626 |
|
JUMPHERE(jump); |
2627 |
|
} |
2628 |
|
#endif |
2629 |
|
} |
2630 |
|
|
2631 |
|
static void read_char8_type(compiler_common *common, BOOL full_read) |
2632 |
{ |
{ |
2633 |
/* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */ |
/* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */ |
2634 |
DEFINE_COMPILER; |
DEFINE_COMPILER; |
2635 |
#if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32 |
#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 |
2636 |
struct sljit_jump *jump; |
struct sljit_jump *jump; |
2637 |
#endif |
#endif |
2638 |
|
#if defined SUPPORT_UTF && defined COMPILE_PCRE8 |
2639 |
|
struct sljit_jump *jump2; |
2640 |
|
#endif |
2641 |
|
|
2642 |
#ifdef SUPPORT_UTF |
SLJIT_UNUSED_ARG(full_read); |
2643 |
|
|
2644 |
|
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0); |
2645 |
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
2646 |
|
|
2647 |
|
#if defined SUPPORT_UTF && defined COMPILE_PCRE8 |
2648 |
if (common->utf) |
if (common->utf) |
2649 |
{ |
{ |
|
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0); |
|
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
|
|
#if defined COMPILE_PCRE8 |
|
2650 |
/* This can be an extra read in some situations, but hopefully |
/* This can be an extra read in some situations, but hopefully |
2651 |
it is needed in most cases. */ |
it is needed in most cases. */ |
2652 |
OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes); |
OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes); |
2653 |
jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0); |
jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0); |
2654 |
add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL)); |
if (!full_read) |
2655 |
JUMPHERE(jump); |
{ |
2656 |
#elif defined COMPILE_PCRE16 |
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); |
2657 |
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0); |
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
2658 |
jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255); |
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); |
2659 |
OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes); |
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6); |
2660 |
JUMPHERE(jump); |
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f); |
2661 |
/* Skip low surrogate if necessary. */ |
OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0); |
2662 |
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xfc00); |
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0); |
2663 |
OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xd800); |
jump2 = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255); |
2664 |
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL); |
OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes); |
2665 |
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1); |
JUMPHERE(jump2); |
2666 |
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); |
} |
2667 |
#elif defined COMPILE_PCRE32 |
else |
2668 |
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0); |
add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL)); |
|
jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255); |
|
|
OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes); |
|
2669 |
JUMPHERE(jump); |
JUMPHERE(jump); |
|
#endif /* COMPILE_PCRE[8|16|32] */ |
|
2670 |
return; |
return; |
2671 |
} |
} |
2672 |
#endif /* SUPPORT_UTF */ |
#endif /* SUPPORT_UTF && COMPILE_PCRE8 */ |
2673 |
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0); |
|
2674 |
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
#if !defined COMPILE_PCRE8 |
|
#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 |
|
2675 |
/* The ctypes array contains only 256 values. */ |
/* The ctypes array contains only 256 values. */ |
2676 |
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0); |
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0); |
2677 |
jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255); |
jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255); |
2678 |
#endif |
#endif |
2679 |
OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes); |
OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes); |
2680 |
#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 |
#if !defined COMPILE_PCRE8 |
2681 |
JUMPHERE(jump); |
JUMPHERE(jump); |
2682 |
#endif |
#endif |
2683 |
|
|
2684 |
|
#if defined SUPPORT_UTF && defined COMPILE_PCRE16 |
2685 |
|
if (common->utf && full_read) |
2686 |
|
{ |
2687 |
|
/* Skip low surrogate if necessary. */ |
2688 |
|
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800); |
2689 |
|
jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1); |
2690 |
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
2691 |
|
JUMPHERE(jump); |
2692 |
|
} |
2693 |
|
#endif /* SUPPORT_UTF && COMPILE_PCRE16 */ |
2694 |
} |
} |
2695 |
|
|
2696 |
static void skip_char_back(compiler_common *common) |
static void skip_char_back(compiler_common *common) |
2768 |
OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20); |
OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20); |
2769 |
jump = JUMP(SLJIT_C_NOT_ZERO); |
jump = JUMP(SLJIT_C_NOT_ZERO); |
2770 |
/* Two byte sequence. */ |
/* Two byte sequence. */ |
2771 |
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); |
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); |
2772 |
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
2773 |
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1f); |
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1f); |
2774 |
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6); |
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6); |
2775 |
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); |
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); |
2776 |
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); |
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); |
2777 |
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1)); |
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2)); |
2778 |
sljit_emit_fast_return(compiler, RETURN_ADDR, 0); |
sljit_emit_fast_return(compiler, RETURN_ADDR, 0); |
2779 |
JUMPHERE(jump); |
JUMPHERE(jump); |
2780 |
|
|
2781 |
OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10); |
OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10); |
2782 |
jump = JUMP(SLJIT_C_NOT_ZERO); |
jump = JUMP(SLJIT_C_NOT_ZERO); |
2783 |
/* Three byte sequence. */ |
/* Three byte sequence. */ |
2784 |
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); |
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); |
2785 |
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0f); |
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0f); |
2786 |
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 12); |
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 12); |
2787 |
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); |
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); |
2788 |
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6); |
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6); |
2789 |
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); |
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); |
2790 |
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2)); |
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); |
2791 |
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); |
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); |
2792 |
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); |
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); |
2793 |
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); |
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); |
2794 |
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2)); |
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3)); |
2795 |
sljit_emit_fast_return(compiler, RETURN_ADDR, 0); |
sljit_emit_fast_return(compiler, RETURN_ADDR, 0); |
2796 |
JUMPHERE(jump); |
JUMPHERE(jump); |
2797 |
|
|
2798 |
/* Four byte sequence. */ |
/* Four byte sequence. */ |
2799 |
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); |
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); |
2800 |
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x07); |
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x07); |
2801 |
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 18); |
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 18); |
2802 |
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); |
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); |
2803 |
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12); |
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12); |
2804 |
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); |
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); |
2805 |
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2)); |
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); |
2806 |
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); |
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); |
2807 |
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6); |
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6); |
2808 |
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); |
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); |
2809 |
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(3)); |
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2)); |
2810 |
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3)); |
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3)); |
2811 |
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); |
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); |
2812 |
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); |
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); |
2813 |
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3)); |
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(4)); |
2814 |
|
sljit_emit_fast_return(compiler, RETURN_ADDR, 0); |
2815 |
|
} |
2816 |
|
|
2817 |
|
static void do_utfreadchar11(compiler_common *common) |
2818 |
|
{ |
2819 |
|
/* Fast decoding a UTF-8 character. TMP1 contains the first byte |
2820 |
|
of the character (>= 0xc0). Return value in TMP1. */ |
2821 |
|
DEFINE_COMPILER; |
2822 |
|
struct sljit_jump *jump; |
2823 |
|
|
2824 |
|
sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); |
2825 |
|
|
2826 |
|
OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20); |
2827 |
|
jump = JUMP(SLJIT_C_NOT_ZERO); |
2828 |
|
/* Two byte sequence. */ |
2829 |
|
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); |
2830 |
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
2831 |
|
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1f); |
2832 |
|
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6); |
2833 |
|
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); |
2834 |
|
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); |
2835 |
|
sljit_emit_fast_return(compiler, RETURN_ADDR, 0); |
2836 |
|
|
2837 |
|
JUMPHERE(jump); |
2838 |
|
OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0); |
2839 |
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x800); |
2840 |
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); |
2841 |
sljit_emit_fast_return(compiler, RETURN_ADDR, 0); |
sljit_emit_fast_return(compiler, RETURN_ADDR, 0); |
2842 |
} |
} |
2843 |
|
|
2857 |
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); |
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); |
2858 |
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
2859 |
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f); |
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f); |
2860 |
|
/* The upper 5 bits are known at this point. */ |
2861 |
|
compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0x3); |
2862 |
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6); |
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6); |
2863 |
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f); |
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f); |
2864 |
OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0); |
OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0); |
|
compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255); |
|
2865 |
OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes); |
OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes); |
2866 |
sljit_emit_fast_return(compiler, RETURN_ADDR, 0); |
sljit_emit_fast_return(compiler, RETURN_ADDR, 0); |
2867 |
|
|
2868 |
JUMPHERE(compare); |
JUMPHERE(compare); |
2869 |
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0); |
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0); |
2870 |
sljit_emit_fast_return(compiler, RETURN_ADDR, 0); |
sljit_emit_fast_return(compiler, RETURN_ADDR, 0); |
|
JUMPHERE(jump); |
|
2871 |
|
|
2872 |
/* We only have types for characters less than 256. */ |
/* We only have types for characters less than 256. */ |
2873 |
OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0); |
JUMPHERE(jump); |
2874 |
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); |
OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0); |
2875 |
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0); |
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0); |
2876 |
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); |
2877 |
sljit_emit_fast_return(compiler, RETURN_ADDR, 0); |
sljit_emit_fast_return(compiler, RETURN_ADDR, 0); |
2878 |
} |
} |
2879 |
|
|
2893 |
|
|
2894 |
JUMPHERE(jump); |
JUMPHERE(jump); |
2895 |
/* Combine two 16 bit characters. */ |
/* Combine two 16 bit characters. */ |
2896 |
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); |
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); |
2897 |
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
2898 |
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff); |
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff); |
2899 |
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10); |
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10); |
2900 |
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3ff); |
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3ff); |
2901 |
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); |
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); |
2902 |
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1)); |
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2)); |
2903 |
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000); |
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000); |
2904 |
sljit_emit_fast_return(compiler, RETURN_ADDR, 0); |
sljit_emit_fast_return(compiler, RETURN_ADDR, 0); |
2905 |
} |
} |
3055 |
return mainloop; |
return mainloop; |
3056 |
} |
} |
3057 |
|
|
3058 |
#define MAX_N_CHARS 3 |
static int scan_prefix(compiler_common *common, pcre_uchar *cc, pcre_uint32 *chars, int max_chars) |
|
|
|
|
static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline) |
|
3059 |
{ |
{ |
3060 |
DEFINE_COMPILER; |
/* Recursive function, which scans prefix literals. */ |
3061 |
struct sljit_label *start; |
int len, repeat, len_save, consumed = 0; |
3062 |
struct sljit_jump *quit; |
pcre_uint32 caseless, chr, mask; |
3063 |
pcre_uint32 chars[MAX_N_CHARS * 2]; |
pcre_uchar *alternative, *cc_save; |
3064 |
pcre_uchar *cc = common->start + 1 + LINK_SIZE; |
BOOL last, any; |
|
int location = 0; |
|
|
pcre_int32 len, c, bit, caseless; |
|
|
int must_stop; |
|
|
|
|
|
/* We do not support alternatives now. */ |
|
|
if (*(common->start + GET(common->start, 1)) == OP_ALT) |
|
|
return FALSE; |
|
3065 |
|
|
3066 |
|
repeat = 1; |
3067 |
while (TRUE) |
while (TRUE) |
3068 |
{ |
{ |
3069 |
|
last = TRUE; |
3070 |
|
any = FALSE; |
3071 |
caseless = 0; |
caseless = 0; |
3072 |
must_stop = 1; |
switch (*cc) |
|
switch(*cc) |
|
3073 |
{ |
{ |
|
case OP_CHAR: |
|
|
must_stop = 0; |
|
|
cc++; |
|
|
break; |
|
|
|
|
3074 |
case OP_CHARI: |
case OP_CHARI: |
3075 |
caseless = 1; |
caseless = 1; |
3076 |
must_stop = 0; |
case OP_CHAR: |
3077 |
|
last = FALSE; |
3078 |
cc++; |
cc++; |
3079 |
break; |
break; |
3080 |
|
|
3099 |
cc++; |
cc++; |
3100 |
break; |
break; |
3101 |
|
|
3102 |
|
case OP_EXACTI: |
3103 |
|
caseless = 1; |
3104 |
case OP_EXACT: |
case OP_EXACT: |
3105 |
|
repeat = GET2(cc, 1); |
3106 |
|
last = FALSE; |
3107 |
cc += 1 + IMM2_SIZE; |
cc += 1 + IMM2_SIZE; |
3108 |
break; |
break; |
3109 |
|
|
3114 |
cc++; |
cc++; |
3115 |
break; |
break; |
3116 |
|
|
3117 |
case OP_EXACTI: |
case OP_KET: |
3118 |
caseless = 1; |
cc += 1 + LINK_SIZE; |
3119 |
cc += 1 + IMM2_SIZE; |
continue; |
3120 |
|
|
3121 |
|
case OP_ALT: |
3122 |
|
cc += GET(cc, 1); |
3123 |
|
continue; |
3124 |
|
|
3125 |
|
case OP_ONCE: |
3126 |
|
case OP_ONCE_NC: |
3127 |
|
case OP_BRA: |
3128 |
|
case OP_BRAPOS: |
3129 |
|
case OP_CBRA: |
3130 |
|
case OP_CBRAPOS: |
3131 |
|
alternative = cc + GET(cc, 1); |
3132 |
|
while (*alternative == OP_ALT) |
3133 |
|
{ |
3134 |
|
max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, max_chars); |
3135 |
|
if (max_chars == 0) |
3136 |
|
return consumed; |
3137 |
|
alternative += GET(alternative, 1); |
3138 |
|
} |
3139 |
|
|
3140 |
|
if (*cc == OP_CBRA || *cc == OP_CBRAPOS) |
3141 |
|
cc += IMM2_SIZE; |
3142 |
|
cc += 1 + LINK_SIZE; |
3143 |
|
continue; |
3144 |
|
|
3145 |
|
case OP_CLASS: |
3146 |
|
case OP_NCLASS: |
3147 |
|
any = TRUE; |
3148 |
|
cc += 1 + 32 / sizeof(pcre_uchar); |
3149 |
break; |
break; |
3150 |
|
|
3151 |
default: |
#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 |
3152 |
must_stop = 2; |
case OP_XCLASS: |
3153 |
|
any = TRUE; |
3154 |
|
cc += GET(cc, 1); |
3155 |
break; |
break; |
3156 |
|
#endif |
3157 |
|
|
3158 |
|
case OP_NOT_DIGIT: |
3159 |
|
case OP_DIGIT: |
3160 |
|
case OP_NOT_WHITESPACE: |
3161 |
|
case OP_WHITESPACE: |
3162 |
|
case OP_NOT_WORDCHAR: |
3163 |
|
case OP_WORDCHAR: |
3164 |
|
case OP_ANY: |
3165 |
|
case OP_ALLANY: |
3166 |
|
any = TRUE; |
3167 |
|
cc++; |
3168 |
|
break; |
3169 |
|
|
3170 |
|
#ifdef SUPPORT_UCP |
3171 |
|
case OP_NOTPROP: |
3172 |
|
case OP_PROP: |
3173 |
|
any = TRUE; |
3174 |
|
cc += 1 + 2; |
3175 |
|
break; |
3176 |
|
#endif |
3177 |
|
|
3178 |
|
case OP_TYPEEXACT: |
3179 |
|
repeat = GET2(cc, 1); |
3180 |
|
cc += 1 + IMM2_SIZE; |
3181 |
|
continue; |
3182 |
|
|
3183 |
|
default: |
3184 |
|
return consumed; |
3185 |
} |
} |
3186 |
|
|
3187 |
if (must_stop == 2) |
if (any) |
3188 |
break; |
{ |
3189 |
|
#ifdef SUPPORT_UTF |
3190 |
|
if (common->utf) return consumed; |
3191 |
|
#endif |
3192 |
|
#if defined COMPILE_PCRE8 |
3193 |
|
mask = 0xff; |
3194 |
|
#elif defined COMPILE_PCRE16 |
3195 |
|
mask = 0xffff; |
3196 |
|
#elif defined COMPILE_PCRE32 |
3197 |
|
mask = 0xffffffff; |
3198 |
|
#else |
3199 |
|
SLJIT_ASSERT_STOP(); |
3200 |
|
#endif |
3201 |
|
|
3202 |
|
do |
3203 |
|
{ |
3204 |
|
chars[0] = mask; |
3205 |
|
chars[1] = mask; |
3206 |
|
|
3207 |
|
if (--max_chars == 0) |
3208 |
|
return consumed; |
3209 |
|
consumed++; |
3210 |
|
chars += 2; |
3211 |
|
} |
3212 |
|
while (--repeat > 0); |
3213 |
|
|
3214 |
|
repeat = 1; |
3215 |
|
continue; |
3216 |
|
} |
3217 |
|
|
3218 |
len = 1; |
len = 1; |
3219 |
#ifdef SUPPORT_UTF |
#ifdef SUPPORT_UTF |
3220 |
if (common->utf && HAS_EXTRALEN(cc[0])) len += GET_EXTRALEN(cc[0]); |
if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc); |
3221 |
#endif |
#endif |
3222 |
|
|
3223 |
if (caseless && char_has_othercase(common, cc)) |
if (caseless != 0 && char_has_othercase(common, cc)) |
3224 |
{ |
{ |
3225 |
caseless = char_get_othercase_bit(common, cc); |
caseless = char_get_othercase_bit(common, cc); |
3226 |
if (caseless == 0) |
if (caseless == 0) |
3227 |
return FALSE; |
return consumed; |
3228 |
#ifdef COMPILE_PCRE8 |
#ifdef COMPILE_PCRE8 |
3229 |
caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 8)); |
caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 8)); |
3230 |
#else |
#else |
3237 |
else |
else |
3238 |
caseless = 0; |
caseless = 0; |
3239 |
|
|
3240 |
while (len > 0 && location < MAX_N_CHARS * 2) |
len_save = len; |
3241 |
{ |
cc_save = cc; |
3242 |
c = *cc; |
while (TRUE) |
3243 |
bit = 0; |
{ |
3244 |
if (len == (caseless & 0xff)) |
do |
3245 |
{ |
{ |
3246 |
bit = caseless >> 8; |
chr = *cc; |
3247 |
c |= bit; |
#ifdef COMPILE_PCRE32 |
3248 |
|
if (SLJIT_UNLIKELY(chr == NOTACHAR)) |
3249 |
|
return consumed; |
3250 |
|
#endif |
3251 |
|
mask = 0; |
3252 |
|
if (len == (caseless & 0xff)) |
3253 |
|
{ |
3254 |
|
mask = caseless >> 8; |
3255 |
|
chr |= mask; |
3256 |
|
} |
3257 |
|
|
3258 |
|
if (chars[0] == NOTACHAR) |
3259 |
|
{ |
3260 |
|
chars[0] = chr; |
3261 |
|
chars[1] = mask; |
3262 |
|
} |
3263 |
|
else |
3264 |
|
{ |
3265 |
|
mask |= chars[0] ^ chr; |
3266 |
|
chr |= mask; |
3267 |
|
chars[0] = chr; |
3268 |
|
chars[1] |= mask; |
3269 |
|
} |
3270 |
|
|
3271 |
|
len--; |
3272 |
|
if (--max_chars == 0) |
3273 |
|
return consumed; |
3274 |
|
consumed++; |
3275 |
|
chars += 2; |
3276 |
|
cc++; |
3277 |
} |
} |
3278 |
|
while (len > 0); |
3279 |
|
|
3280 |
chars[location] = c; |
if (--repeat == 0) |
3281 |
chars[location + 1] = bit; |
break; |
3282 |
|
|
3283 |
len--; |
len = len_save; |
3284 |
location += 2; |
cc = cc_save; |
|
cc++; |
|
3285 |
} |
} |
3286 |
|
|
3287 |
if (location >= MAX_N_CHARS * 2 || must_stop != 0) |
repeat = 1; |
3288 |
|
if (last) |
3289 |
|
return consumed; |
3290 |
|
} |
3291 |
|
} |
3292 |
|
|
3293 |
|
#define MAX_N_CHARS 16 |
3294 |
|
|
3295 |
|
static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline) |
3296 |
|
{ |
3297 |
|
DEFINE_COMPILER; |
3298 |
|
struct sljit_label *start; |
3299 |
|
struct sljit_jump *quit; |
3300 |
|
pcre_uint32 chars[MAX_N_CHARS * 2]; |
3301 |
|
pcre_uint8 ones[MAX_N_CHARS]; |
3302 |
|
pcre_uint32 mask; |
3303 |
|
int i, max; |
3304 |
|
int offsets[3]; |
3305 |
|
|
3306 |
|
for (i = 0; i < MAX_N_CHARS; i++) |
3307 |
|
{ |
3308 |
|
chars[i << 1] = NOTACHAR; |
3309 |
|
chars[(i << 1) + 1] = 0; |
3310 |
|
} |
3311 |
|
|
3312 |
|
max = scan_prefix(common, common->start, chars, MAX_N_CHARS); |
3313 |
|
|
3314 |
|
if (max <= 1) |
3315 |
|
return FALSE; |
3316 |
|
|
3317 |
|
for (i = 0; i < max; i++) |
3318 |
|
{ |
3319 |
|
mask = chars[(i << 1) + 1]; |
3320 |
|
ones[i] = ones_in_half_byte[mask & 0xf]; |
3321 |
|
mask >>= 4; |
3322 |
|
while (mask != 0) |
3323 |
|
{ |
3324 |
|
ones[i] += ones_in_half_byte[mask & 0xf]; |
3325 |
|
mask >>= 4; |
3326 |
|
} |
3327 |
|
} |
3328 |
|
|
3329 |
|
offsets[0] = -1; |
3330 |
|
/* Scan forward. */ |
3331 |
|
for (i = 0; i < max; i++) |
3332 |
|
if (ones[i] <= 2) { |
3333 |
|
offsets[0] = i; |
3334 |
break; |
break; |
3335 |
} |
} |
3336 |
|
|
3337 |
/* At least two characters are required. */ |
if (offsets[0] == -1) |
3338 |
if (location < 2 * 2) |
return FALSE; |
3339 |
return FALSE; |
|
3340 |
|
/* Scan backward. */ |
3341 |
|
offsets[1] = -1; |
3342 |
|
for (i = max - 1; i > offsets[0]; i--) |
3343 |
|
if (ones[i] <= 2) { |
3344 |
|
offsets[1] = i; |
3345 |
|
break; |
3346 |
|
} |
3347 |
|
|
3348 |
|
offsets[2] = -1; |
3349 |
|
if (offsets[1] >= 0) |
3350 |
|
{ |
3351 |
|
/* Scan from middle. */ |
3352 |
|
for (i = (offsets[0] + offsets[1]) / 2 + 1; i < offsets[1]; i++) |
3353 |
|
if (ones[i] <= 2) |
3354 |
|
{ |
3355 |
|
offsets[2] = i; |
3356 |
|
break; |
3357 |
|
} |
3358 |
|
|
3359 |
|
if (offsets[2] == -1) |
3360 |
|
{ |
3361 |
|
for (i = (offsets[0] + offsets[1]) / 2; i > offsets[0]; i--) |
3362 |
|
if (ones[i] <= 2) |
3363 |
|
{ |
3364 |
|
offsets[2] = i; |
3365 |
|
break; |
3366 |
|
} |
3367 |
|
} |
3368 |
|
} |
3369 |
|
|
3370 |
|
SLJIT_ASSERT(offsets[1] == -1 || (offsets[0] < offsets[1])); |
3371 |
|
SLJIT_ASSERT(offsets[2] == -1 || (offsets[0] < offsets[2] && offsets[1] > offsets[2])); |
3372 |
|
|
3373 |
|
chars[0] = chars[offsets[0] << 1]; |
3374 |
|
chars[1] = chars[(offsets[0] << 1) + 1]; |
3375 |
|
if (offsets[2] >= 0) |
3376 |
|
{ |
3377 |
|
chars[2] = chars[offsets[2] << 1]; |
3378 |
|
chars[3] = chars[(offsets[2] << 1) + 1]; |
3379 |
|
} |
3380 |
|
if (offsets[1] >= 0) |
3381 |
|
{ |
3382 |
|
chars[4] = chars[offsets[1] << 1]; |
3383 |
|
chars[5] = chars[(offsets[1] << 1) + 1]; |
3384 |
|
} |
3385 |
|
|
3386 |
|
max -= 1; |
3387 |
if (firstline) |
if (firstline) |
3388 |
{ |
{ |
3389 |
SLJIT_ASSERT(common->first_line_end != 0); |
SLJIT_ASSERT(common->first_line_end != 0); |
3390 |
OP1(SLJIT_MOV, TMP3, 0, STR_END, 0); |
OP1(SLJIT_MOV, TMP3, 0, STR_END, 0); |
3391 |
OP2(SLJIT_SUB, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, SLJIT_IMM, IN_UCHARS((location >> 1) - 1)); |
OP2(SLJIT_SUB, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, SLJIT_IMM, IN_UCHARS(max)); |
3392 |
} |
} |
3393 |
else |
else |
3394 |
OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS((location >> 1) - 1)); |
OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max)); |
3395 |
|
|
3396 |
start = LABEL(); |
start = LABEL(); |
3397 |
quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); |
quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); |
3398 |
|
|
3399 |
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); |
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[0])); |
3400 |
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); |
if (offsets[1] >= 0) |
3401 |
|
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[1])); |
3402 |
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
3403 |
|
|
3404 |
if (chars[1] != 0) |
if (chars[1] != 0) |
3405 |
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]); |
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]); |
3406 |
CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start); |
CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start); |
3407 |
if (location > 2 * 2) |
if (offsets[2] >= 0) |
3408 |
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); |
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[2] - 1)); |
3409 |
if (chars[3] != 0) |
|
3410 |
OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[3]); |
if (offsets[1] >= 0) |
|
CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[2], start); |
|
|
if (location > 2 * 2) |
|
3411 |
{ |
{ |
3412 |
if (chars[5] != 0) |
if (chars[5] != 0) |
3413 |
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[5]); |
OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[5]); |
3414 |
CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[4], start); |
CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[4], start); |
3415 |
|
} |
3416 |
|
|
3417 |
|
if (offsets[2] >= 0) |
3418 |
|
{ |
3419 |
|
if (chars[3] != 0) |
3420 |
|
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[3]); |
3421 |
|
CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[2], start); |
3422 |
} |
} |
3423 |
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
3424 |
|
|
3427 |
if (firstline) |
if (firstline) |
3428 |
OP1(SLJIT_MOV, STR_END, 0, TMP3, 0); |
OP1(SLJIT_MOV, STR_END, 0, TMP3, 0); |
3429 |
else |
else |
3430 |
OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS((location >> 1) - 1)); |
OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max)); |
3431 |
return TRUE; |
return TRUE; |
3432 |
} |
} |
3433 |
|
|
3576 |
OP1(SLJIT_MOV, STR_END, 0, TMP3, 0); |
OP1(SLJIT_MOV, STR_END, 0, TMP3, 0); |
3577 |
} |
} |
3578 |
|
|
3579 |
static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks); |
static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks); |
3580 |
|
|
3581 |
static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, sljit_uw start_bits, BOOL firstline) |
static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, pcre_uint8 *start_bits, BOOL firstline) |
3582 |
{ |
{ |
3583 |
DEFINE_COMPILER; |
DEFINE_COMPILER; |
3584 |
struct sljit_label *start; |
struct sljit_label *start; |
3585 |
struct sljit_jump *quit; |
struct sljit_jump *quit; |
3586 |
struct sljit_jump *found = NULL; |
struct sljit_jump *found = NULL; |
3587 |
jump_list *matches = NULL; |
jump_list *matches = NULL; |
|
pcre_uint8 inverted_start_bits[32]; |
|
|
int i; |
|
3588 |
#ifndef COMPILE_PCRE8 |
#ifndef COMPILE_PCRE8 |
3589 |
struct sljit_jump *jump; |
struct sljit_jump *jump; |
3590 |
#endif |
#endif |
3591 |
|
|
|
for (i = 0; i < 32; ++i) |
|
|
inverted_start_bits[i] = ~(((pcre_uint8*)start_bits)[i]); |
|
|
|
|
3592 |
if (firstline) |
if (firstline) |
3593 |
{ |
{ |
3594 |
SLJIT_ASSERT(common->first_line_end != 0); |
SLJIT_ASSERT(common->first_line_end != 0); |
3604 |
OP1(SLJIT_MOV, TMP3, 0, TMP1, 0); |
OP1(SLJIT_MOV, TMP3, 0, TMP1, 0); |
3605 |
#endif |
#endif |
3606 |
|
|
3607 |
if (!check_class_ranges(common, inverted_start_bits, (inverted_start_bits[31] & 0x80) != 0, &matches)) |
if (!check_class_ranges(common, start_bits, (start_bits[31] & 0x80) != 0, TRUE, &matches)) |
3608 |
{ |
{ |
3609 |
#ifndef COMPILE_PCRE8 |
#ifndef COMPILE_PCRE8 |
3610 |
jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255); |
jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255); |
3613 |
#endif |
#endif |
3614 |
OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7); |
OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7); |
3615 |
OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3); |
OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3); |
3616 |
OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), start_bits); |
OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits); |
3617 |
OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0); |
OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0); |
3618 |
OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0); |
OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0); |
3619 |
found = JUMP(SLJIT_C_NOT_ZERO); |
found = JUMP(SLJIT_C_NOT_ZERO); |
3862 |
sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0); |
sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0); |
3863 |
} |
} |
3864 |
|
|
3865 |
/* |
static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks) |
|
range format: |
|
|
|
|
|
ranges[0] = length of the range (max MAX_RANGE_SIZE, -1 means invalid range). |
|
|
ranges[1] = first bit (0 or 1) |
|
|
ranges[2-length] = position of the bit change (when the current bit is not equal to the previous) |
|
|
*/ |
|
|
|
|
|
static BOOL check_ranges(compiler_common *common, int *ranges, jump_list **backtracks, BOOL readch) |
|
3866 |
{ |
{ |
3867 |
DEFINE_COMPILER; |
DEFINE_COMPILER; |
3868 |
struct sljit_jump *jump; |
int ranges[MAX_RANGE_SIZE]; |
|
|
|
|
if (ranges[0] < 0) |
|
|
return FALSE; |
|
|
|
|
|
switch(ranges[0]) |
|
|
{ |
|
|
case 1: |
|
|
if (readch) |
|
|
read_char(common); |
|
|
add_jump(compiler, backtracks, CMP(ranges[1] == 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2])); |
|
|
return TRUE; |
|
|
|
|
|
case 2: |
|
|
if (readch) |
|
|
read_char(common); |
|
|
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]); |
|
|
add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2])); |
|
|
return TRUE; |
|
|
|
|
|
case 4: |
|
|
if (ranges[2] + 1 == ranges[3] && ranges[4] + 1 == ranges[5]) |
|
|
{ |
|
|
if (readch) |
|
|
read_char(common); |
|
|
if (ranges[1] != 0) |
|
|
{ |
|
|
add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2])); |
|
|
add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4])); |
|
|
} |
|
|
else |
|
|
{ |
|
|
jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]); |
|
|
add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4])); |
|
|
JUMPHERE(jump); |
|
|
} |
|
|
return TRUE; |
|
|
} |
|
|
if ((ranges[3] - ranges[2]) == (ranges[5] - ranges[4]) && is_powerof2(ranges[4] - ranges[2])) |
|
|
{ |
|
|
if (readch) |
|
|
read_char(common); |
|
|
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4] - ranges[2]); |
|
|
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4]); |
|
|
add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[5] - ranges[4])); |
|
|
return TRUE; |
|
|
} |
|
|
return FALSE; |
|
|
|
|
|
default: |
|
|
return FALSE; |
|
|
} |
|
|
} |
|
|
|
|
|
static void get_ctype_ranges(compiler_common *common, int flag, int *ranges) |
|
|
{ |
|
|
int i, bit, length; |
|
|
const pcre_uint8 *ctypes = (const pcre_uint8*)common->ctypes; |
|
|
|
|
|
bit = ctypes[0] & flag; |
|
|
ranges[0] = -1; |
|
|
ranges[1] = bit != 0 ? 1 : 0; |
|
|
length = 0; |
|
|
|
|
|
for (i = 1; i < 256; i++) |
|
|
if ((ctypes[i] & flag) != bit) |
|
|
{ |
|
|
if (length >= MAX_RANGE_SIZE) |
|
|
return; |
|
|
ranges[2 + length] = i; |
|
|
length++; |
|
|
bit ^= flag; |
|
|
} |
|
|
|
|
|
if (bit != 0) |
|
|
{ |
|
|
if (length >= MAX_RANGE_SIZE) |
|
|
return; |
|
|
ranges[2 + length] = 256; |
|
|
length++; |
|
|
} |
|
|
ranges[0] = length; |
|
|
} |
|
|
|
|
|
static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks) |
|
|
{ |
|
|
int ranges[2 + MAX_RANGE_SIZE]; |
|
3869 |
pcre_uint8 bit, cbit, all; |
pcre_uint8 bit, cbit, all; |
3870 |
int i, byte, length = 0; |
int i, byte, length = 0; |
3871 |
|
|
3872 |
bit = bits[0] & 0x1; |
bit = bits[0] & 0x1; |
3873 |
ranges[1] = bit; |
/* All bits will be zero or one (since bit is zero or one). */ |
|
/* Can be 0 or 255. */ |
|
3874 |
all = -bit; |
all = -bit; |
3875 |
|
|
3876 |
for (i = 0; i < 256; ) |
for (i = 0; i < 256; ) |
3885 |
{ |
{ |
3886 |
if (length >= MAX_RANGE_SIZE) |
if (length >= MAX_RANGE_SIZE) |
3887 |
return FALSE; |
return FALSE; |
3888 |
ranges[2 + length] = i; |
ranges[length] = i; |
3889 |
length++; |
length++; |
3890 |
bit = cbit; |
bit = cbit; |
3891 |
all = -cbit; |
all = -cbit; |
3898 |
{ |
{ |
3899 |
if (length >= MAX_RANGE_SIZE) |
if (length >= MAX_RANGE_SIZE) |
3900 |
return FALSE; |
return FALSE; |
3901 |
ranges[2 + length] = 256; |
ranges[length] = 256; |
3902 |
length++; |
length++; |
3903 |
} |
} |
|
ranges[0] = length; |
|
3904 |
|
|
3905 |
return check_ranges(common, ranges, backtracks, FALSE); |
if (length < 0 || length > 4) |
3906 |
|
return FALSE; |
3907 |
|
|
3908 |
|
bit = bits[0] & 0x1; |
3909 |
|
if (invert) bit ^= 0x1; |
3910 |
|
|
3911 |
|
/* No character is accepted. */ |
3912 |
|
if (length == 0 && bit == 0) |
3913 |
|
add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); |
3914 |
|
|
3915 |
|
switch(length) |
3916 |
|
{ |
3917 |
|
case 0: |
3918 |
|
/* When bit != 0, all characters are accepted. */ |
3919 |
|
return TRUE; |
3920 |
|
|
3921 |
|
case 1: |
3922 |
|
add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0])); |
3923 |
|
return TRUE; |
3924 |
|
|
3925 |
|
case 2: |
3926 |
|
if (ranges[0] + 1 != ranges[1]) |
3927 |
|
{ |
3928 |
|
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]); |
3929 |
|
add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0])); |
3930 |
|
} |
3931 |
|
else |
3932 |
|
add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0])); |
3933 |
|
return TRUE; |
3934 |
|
|
3935 |
|
case 3: |
3936 |
|
if (bit != 0) |
3937 |
|
{ |
3938 |
|
add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2])); |
3939 |
|
if (ranges[0] + 1 != ranges[1]) |
3940 |
|
{ |
3941 |
|
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]); |
3942 |
|
add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0])); |
3943 |
|
} |
3944 |
|
else |
3945 |
|
add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0])); |
3946 |
|
return TRUE; |
3947 |
|
} |
3948 |
|
|
3949 |
|
add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[0])); |
3950 |
|
if (ranges[1] + 1 != ranges[2]) |
3951 |
|
{ |
3952 |
|
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]); |
3953 |
|
add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1])); |
3954 |
|
} |
3955 |
|
else |
3956 |
|
add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1])); |
3957 |
|
return TRUE; |
3958 |
|
|
3959 |
|
case 4: |
3960 |
|
if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2]) |
3961 |
|
&& (ranges[0] | (ranges[2] - ranges[0])) == ranges[2] |
3962 |
|
&& is_powerof2(ranges[2] - ranges[0])) |
3963 |
|
{ |
3964 |
|
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]); |
3965 |
|
if (ranges[2] + 1 != ranges[3]) |
3966 |
|
{ |
3967 |
|
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]); |
3968 |
|
add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2])); |
3969 |
|
} |
3970 |
|
else |
3971 |
|
add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2])); |
3972 |
|
return TRUE; |
3973 |
|
} |
3974 |
|
|
3975 |
|
if (bit != 0) |
3976 |
|
{ |
3977 |
|
i = 0; |
3978 |
|
if (ranges[0] + 1 != ranges[1]) |
3979 |
|
{ |
3980 |
|
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]); |
3981 |
|
add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0])); |
3982 |
|
i = ranges[0]; |
3983 |
|
} |
3984 |
|
else |
3985 |
|
add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0])); |
3986 |
|
|
3987 |
|
if (ranges[2] + 1 != ranges[3]) |
3988 |
|
{ |
3989 |
|
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i); |
3990 |
|
add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2])); |
3991 |
|
} |
3992 |
|
else |
3993 |
|
add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i)); |
3994 |
|
return TRUE; |
3995 |
|
} |
3996 |
|
|
3997 |
|
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]); |
3998 |
|
add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0])); |
3999 |
|
if (ranges[1] + 1 != ranges[2]) |
4000 |
|
{ |
4001 |
|
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]); |
4002 |
|
add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1])); |
4003 |
|
} |
4004 |
|
else |
4005 |
|
add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0])); |
4006 |
|
return TRUE; |
4007 |
|
|
4008 |
|
default: |
4009 |
|
SLJIT_ASSERT_STOP(); |
4010 |
|
return FALSE; |
4011 |
|
} |
4012 |
} |
} |
4013 |
|
|
4014 |
static void check_anynewline(compiler_common *common) |
static void check_anynewline(compiler_common *common) |
4396 |
{ |
{ |
4397 |
DEFINE_COMPILER; |
DEFINE_COMPILER; |
4398 |
jump_list *found = NULL; |
jump_list *found = NULL; |
4399 |
jump_list **list = (*cc & XCL_NOT) == 0 ? &found : backtracks; |
jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks; |
4400 |
pcre_int32 c, charoffset; |
pcre_int32 c, charoffset; |
|
const pcre_uint32 *other_cases; |
|
4401 |
struct sljit_jump *jump = NULL; |
struct sljit_jump *jump = NULL; |
4402 |
pcre_uchar *ccbegin; |
pcre_uchar *ccbegin; |
4403 |
int compares, invertcmp, numberofcmps; |
int compares, invertcmp, numberofcmps; |
4404 |
|
|
4405 |
#ifdef SUPPORT_UCP |
#ifdef SUPPORT_UCP |
4406 |
BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE; |
BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE; |
4407 |
BOOL charsaved = FALSE; |
BOOL charsaved = FALSE; |
4408 |
int typereg = TMP1, scriptreg = TMP1; |
int typereg = TMP1, scriptreg = TMP1; |
4409 |
|
const pcre_uint32 *other_cases; |
4410 |
pcre_int32 typeoffset; |
pcre_int32 typeoffset; |
4411 |
#endif |
#endif |
4412 |
|
|
4415 |
detect_partial_match(common, backtracks); |
detect_partial_match(common, backtracks); |
4416 |
read_char(common); |
read_char(common); |
4417 |
|
|
4418 |
if ((*cc++ & XCL_MAP) != 0) |
cc++; |
4419 |
|
if ((cc[-1] & XCL_HASPROP) == 0) |
4420 |
{ |
{ |
4421 |
OP1(SLJIT_MOV, TMP3, 0, TMP1, 0); |
if ((cc[-1] & XCL_MAP) != 0) |
4422 |
#ifndef COMPILE_PCRE8 |
{ |
4423 |
jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255); |
OP1(SLJIT_MOV, TMP3, 0, TMP1, 0); |
4424 |
#elif defined SUPPORT_UTF |
#ifdef SUPPORT_UCP |
4425 |
if (common->utf) |
charsaved = TRUE; |
|
jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255); |
|
4426 |
#endif |
#endif |
4427 |
|
if (!check_class_ranges(common, (const pcre_uint8 *)cc, TRUE, FALSE, backtracks)) |
4428 |
|
{ |
4429 |
|
jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255); |
4430 |
|
|
4431 |
|
OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7); |
4432 |
|
OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3); |
4433 |
|
OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc); |
4434 |
|
OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0); |
4435 |
|
OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0); |
4436 |
|
add_jump(compiler, &found, JUMP(SLJIT_C_NOT_ZERO)); |
4437 |
|
add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); |
4438 |
|
|
4439 |
if (!check_class_ranges(common, (const pcre_uint8 *)cc, TRUE, list)) |
JUMPHERE(jump); |
4440 |
|
} |
4441 |
|
else |
4442 |
|
add_jump(compiler, &found, CMP(SLJIT_C_LESS_EQUAL, TMP3, 0, SLJIT_IMM, 0xff)); |
4443 |
|
|
4444 |
|
OP1(SLJIT_MOV, TMP1, 0, TMP3, 0); |
4445 |
|
cc += 32 / sizeof(pcre_uchar); |
4446 |
|
} |
4447 |
|
else |
4448 |
|
add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xff)); |
4449 |
|
} |
4450 |
|
else if ((cc[-1] & XCL_MAP) != 0) |
4451 |
|
{ |
4452 |
|
OP1(SLJIT_MOV, TMP3, 0, TMP1, 0); |
4453 |
|
#ifdef SUPPORT_UCP |
4454 |
|
charsaved = TRUE; |
4455 |
|
#endif |
4456 |
|
if (!check_class_ranges(common, (const pcre_uint8 *)cc, FALSE, TRUE, list)) |
4457 |
{ |
{ |
4458 |
|
#ifdef COMPILE_PCRE8 |
4459 |
|
SLJIT_ASSERT(common->utf); |
4460 |
|
#endif |
4461 |
|
jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255); |
4462 |
|
|
4463 |
OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7); |
OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7); |
4464 |
OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3); |
OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3); |
4465 |
OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc); |
OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc); |
4466 |
OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0); |
OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0); |
4467 |
OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0); |
OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0); |
4468 |
add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO)); |
add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO)); |
|
} |
|
4469 |
|
|
|
#ifndef COMPILE_PCRE8 |
|
|
JUMPHERE(jump); |
|
|
#elif defined SUPPORT_UTF |
|
|
if (common->utf) |
|
4470 |
JUMPHERE(jump); |
JUMPHERE(jump); |
4471 |
#endif |
} |
4472 |
|
|
4473 |
OP1(SLJIT_MOV, TMP1, 0, TMP3, 0); |
OP1(SLJIT_MOV, TMP1, 0, TMP3, 0); |
|
#ifdef SUPPORT_UCP |
|
|
charsaved = TRUE; |
|
|
#endif |
|
4474 |
cc += 32 / sizeof(pcre_uchar); |
cc += 32 / sizeof(pcre_uchar); |
4475 |
} |
} |
4476 |
|
|
4528 |
case PT_SPACE: |
case PT_SPACE: |
4529 |
case PT_PXSPACE: |
case PT_PXSPACE: |
4530 |
case PT_WORD: |
case PT_WORD: |
4531 |
|
case PT_PXGRAPH: |
4532 |
|
case PT_PXPRINT: |
4533 |
|
case PT_PXPUNCT: |
4534 |
needstype = TRUE; |
needstype = TRUE; |
4535 |
needschar = TRUE; |
needschar = TRUE; |
4536 |
break; |
break; |
4718 |
|
|
4719 |
case PT_SPACE: |
case PT_SPACE: |
4720 |
case PT_PXSPACE: |
case PT_PXSPACE: |
|
if (*cc == PT_SPACE) |
|
|
{ |
|
|
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0); |
|
|
jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 11 - charoffset); |
|
|
} |
|
4721 |
SET_CHAR_OFFSET(9); |
SET_CHAR_OFFSET(9); |
4722 |
OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 13 - 9); |
OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd - 0x9); |
4723 |
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL); |
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL); |
4724 |
if (*cc == PT_SPACE) |
|
4725 |
JUMPHERE(jump); |
OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x9); |
4726 |
|
OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); |
4727 |
|
|
4728 |
|
OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x9); |
4729 |
|
OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); |
4730 |
|
|
4731 |
SET_TYPE_OFFSET(ucp_Zl); |
SET_TYPE_OFFSET(ucp_Zl); |
4732 |
OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl); |
OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl); |
4818 |
OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_GREATER_EQUAL); |
OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_GREATER_EQUAL); |
4819 |
jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp); |
jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp); |
4820 |
break; |
break; |
4821 |
|
|
4822 |
|
case PT_PXGRAPH: |
4823 |
|
/* C and Z groups are the farthest two groups. */ |
4824 |
|
SET_TYPE_OFFSET(ucp_Ll); |
4825 |
|
OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll); |
4826 |
|
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER); |
4827 |
|
|
4828 |
|
jump = CMP(SLJIT_C_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll); |
4829 |
|
|
4830 |
|
/* In case of ucp_Cf, we overwrite the result. */ |
4831 |
|
SET_CHAR_OFFSET(0x2066); |
4832 |
|
OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066); |
4833 |
|
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL); |
4834 |
|
|
4835 |
|
OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066); |
4836 |
|
OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); |
4837 |
|
|
4838 |
|
OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066); |
4839 |
|
OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); |
4840 |
|
|
4841 |
|
JUMPHERE(jump); |
4842 |
|
jump = CMP(SLJIT_C_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0); |
4843 |
|
break; |
4844 |
|
|
4845 |
|
case PT_PXPRINT: |
4846 |
|
/* C and Z groups are the farthest two groups. */ |
4847 |
|
SET_TYPE_OFFSET(ucp_Ll); |
4848 |
|
OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll); |
4849 |
|
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER); |
4850 |
|
|
4851 |
|
OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll); |
4852 |
|
OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_C_NOT_EQUAL); |
4853 |
|
|
4854 |
|
jump = CMP(SLJIT_C_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll); |
4855 |
|
|
4856 |
|
/* In case of ucp_Cf, we overwrite the result. */ |
4857 |
|
SET_CHAR_OFFSET(0x2066); |
4858 |
|
OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066); |
4859 |
|
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL); |
4860 |
|
|
4861 |
|
OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066); |
4862 |
|
OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); |
4863 |
|
|
4864 |
|
JUMPHERE(jump); |
4865 |
|
jump = CMP(SLJIT_C_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0); |
4866 |
|
break; |
4867 |
|
|
4868 |
|
case PT_PXPUNCT: |
4869 |
|
SET_TYPE_OFFSET(ucp_Sc); |
4870 |
|
OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc); |
4871 |
|
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL); |
4872 |
|
|
4873 |
|
SET_CHAR_OFFSET(0); |
4874 |
|
OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xff); |
4875 |
|
OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL); |
4876 |
|
|
4877 |
|
SET_TYPE_OFFSET(ucp_Pc); |
4878 |
|
OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc); |
4879 |
|
OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL); |
4880 |
|
jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp); |
4881 |
|
break; |
4882 |
} |
} |
4883 |
cc += 2; |
cc += 2; |
4884 |
} |
} |
4910 |
#ifdef SUPPORT_UCP |
#ifdef SUPPORT_UCP |
4911 |
pcre_uchar propdata[5]; |
pcre_uchar propdata[5]; |
4912 |
#endif |
#endif |
4913 |
#endif |
#endif /* SUPPORT_UTF */ |
4914 |
|
|
4915 |
switch(type) |
switch(type) |
4916 |
{ |
{ |
4935 |
case OP_NOT_DIGIT: |
case OP_NOT_DIGIT: |
4936 |
case OP_DIGIT: |
case OP_DIGIT: |
4937 |
/* Digits are usually 0-9, so it is worth to optimize them. */ |
/* Digits are usually 0-9, so it is worth to optimize them. */ |
|
if (common->digits[0] == -2) |
|
|
get_ctype_ranges(common, ctype_digit, common->digits); |
|
4938 |
detect_partial_match(common, backtracks); |
detect_partial_match(common, backtracks); |
4939 |
/* Flip the starting bit in the negative case. */ |
#if defined SUPPORT_UTF && defined COMPILE_PCRE8 |
4940 |
if (type == OP_NOT_DIGIT) |
if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_digit, FALSE)) |
4941 |
common->digits[1] ^= 1; |
read_char7_type(common, type == OP_NOT_DIGIT); |
4942 |
if (!check_ranges(common, common->digits, backtracks, TRUE)) |
else |
4943 |
{ |
#endif |
4944 |
read_char8_type(common); |
read_char8_type(common, type == OP_NOT_DIGIT); |
4945 |
OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit); |
/* Flip the starting bit in the negative case. */ |
4946 |
add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO)); |
OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit); |
4947 |
} |
add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO)); |
|
if (type == OP_NOT_DIGIT) |
|
|
common->digits[1] ^= 1; |
|
4948 |
return cc; |
return cc; |
4949 |
|
|
4950 |
case OP_NOT_WHITESPACE: |
case OP_NOT_WHITESPACE: |
4951 |
case OP_WHITESPACE: |
case OP_WHITESPACE: |
4952 |
detect_partial_match(common, backtracks); |
detect_partial_match(common, backtracks); |
4953 |
read_char8_type(common); |
#if defined SUPPORT_UTF && defined COMPILE_PCRE8 |
4954 |
|
if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_space, FALSE)) |
4955 |
|
read_char7_type(common, type == OP_NOT_WHITESPACE); |
4956 |
|
else |
4957 |
|
#endif |
4958 |
|
read_char8_type(common, type == OP_NOT_WHITESPACE); |
4959 |
OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space); |
OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space); |
4960 |
add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO)); |
add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO)); |
4961 |
return cc; |
return cc; |
4963 |
case OP_NOT_WORDCHAR: |
case OP_NOT_WORDCHAR: |
4964 |
case OP_WORDCHAR: |
case OP_WORDCHAR: |
4965 |
detect_partial_match(common, backtracks); |
detect_partial_match(common, backtracks); |
4966 |
read_char8_type(common); |
#if defined SUPPORT_UTF && defined COMPILE_PCRE8 |
4967 |
|
if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_word, FALSE)) |
4968 |
|
read_char7_type(common, type == OP_NOT_WORDCHAR); |
4969 |
|
else |
4970 |
|
#endif |
4971 |
|
read_char8_type(common, type == OP_NOT_WORDCHAR); |
4972 |
OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word); |
OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word); |
4973 |
add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO)); |
add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO)); |
4974 |
return cc; |
return cc; |
5031 |
#ifdef SUPPORT_UCP |
#ifdef SUPPORT_UCP |
5032 |
case OP_NOTPROP: |
case OP_NOTPROP: |
5033 |
case OP_PROP: |
case OP_PROP: |
5034 |
propdata[0] = 0; |
propdata[0] = XCL_HASPROP; |
5035 |
propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP; |
propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP; |
5036 |
propdata[2] = cc[0]; |
propdata[2] = cc[0]; |
5037 |
propdata[3] = cc[1]; |
propdata[3] = cc[1]; |
5389 |
case OP_CLASS: |
case OP_CLASS: |
5390 |
case OP_NCLASS: |
case OP_NCLASS: |
5391 |
detect_partial_match(common, backtracks); |
detect_partial_match(common, backtracks); |
5392 |
read_char(common); |
|
5393 |
if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, backtracks)) |
#if defined SUPPORT_UTF && defined COMPILE_PCRE8 |
5394 |
|
bit = (common->utf && is_char7_bitset((const pcre_uint8 *)cc, type == OP_NCLASS)) ? 127 : 255; |
5395 |
|
read_char_max(common, bit, type == OP_NCLASS); |
5396 |
|
#else |
5397 |
|
read_char_max(common, 255, type == OP_NCLASS); |
5398 |
|
#endif |
5399 |
|
|
5400 |
|
if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, FALSE, backtracks)) |
5401 |
return cc + 32 / sizeof(pcre_uchar); |
return cc + 32 / sizeof(pcre_uchar); |
5402 |
|
|
5403 |
#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 |
#if defined SUPPORT_UTF && defined COMPILE_PCRE8 |
5404 |
jump[0] = NULL; |
jump[0] = NULL; |
|
#ifdef COMPILE_PCRE8 |
|
|
/* This check only affects 8 bit mode. In other modes, we |
|
|
always need to compare the value with 255. */ |
|
5405 |
if (common->utf) |
if (common->utf) |
|
#endif /* COMPILE_PCRE8 */ |
|
5406 |
{ |
{ |
5407 |
jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255); |
jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, bit); |
5408 |
if (type == OP_CLASS) |
if (type == OP_CLASS) |
5409 |
{ |
{ |
5410 |
add_jump(compiler, backtracks, jump[0]); |
add_jump(compiler, backtracks, jump[0]); |
5411 |
jump[0] = NULL; |
jump[0] = NULL; |
5412 |
} |
} |
5413 |
} |
} |
5414 |
#endif /* SUPPORT_UTF || !COMPILE_PCRE8 */ |
#elif !defined COMPILE_PCRE8 |
5415 |
|
jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255); |
5416 |
|
if (type == OP_CLASS) |
5417 |
|
{ |
5418 |
|
add_jump(compiler, backtracks, jump[0]); |
5419 |
|
jump[0] = NULL; |
5420 |
|
} |
5421 |
|
#endif /* SUPPORT_UTF && COMPILE_PCRE8 */ |
5422 |
|
|
5423 |
OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7); |
OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7); |
5424 |
OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3); |
OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3); |
5425 |
OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc); |
OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc); |
5426 |
OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0); |
OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0); |
5427 |
OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0); |
OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0); |
5428 |
add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO)); |
add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO)); |
5429 |
|
|
5430 |
#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 |
#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 |
5431 |
if (jump[0] != NULL) |
if (jump[0] != NULL) |
5432 |
JUMPHERE(jump[0]); |
JUMPHERE(jump[0]); |
5433 |
#endif /* SUPPORT_UTF || !COMPILE_PCRE8 */ |
#endif |
5434 |
|
|
5435 |
return cc + 32 / sizeof(pcre_uchar); |
return cc + 32 / sizeof(pcre_uchar); |
5436 |
|
|
5437 |
#if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32 |
#if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32 |
5536 |
return compile_char1_matchingpath(common, *cc, cc + 1, backtracks); |
return compile_char1_matchingpath(common, *cc, cc + 1, backtracks); |
5537 |
} |
} |
5538 |
|
|
|
static struct sljit_jump *compile_ref_checks(compiler_common *common, pcre_uchar *cc, jump_list **backtracks) |
|
|
{ |
|
|
DEFINE_COMPILER; |
|
|
int offset = GET2(cc, 1) << 1; |
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset)); |
|
|
if (!common->jscript_compat) |
|
|
{ |
|
|
if (backtracks == NULL) |
|
|
{ |
|
|
/* OVECTOR(1) contains the "string begin - 1" constant. */ |
|
|
OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)); |
|
|
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL); |
|
|
OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1)); |
|
|
OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); |
|
|
return JUMP(SLJIT_C_NOT_ZERO); |
|
|
} |
|
|
add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1))); |
|
|
} |
|
|
return CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1)); |
|
|
} |
|
|
|
|
5539 |
/* Forward definitions. */ |
/* Forward definitions. */ |
5540 |
static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *); |
static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *); |
5541 |
static void compile_backtrackingpath(compiler_common *, struct backtrack_common *); |
static void compile_backtrackingpath(compiler_common *, struct backtrack_common *); |
5568 |
|
|
5569 |
#define BACKTRACK_AS(type) ((type *)backtrack) |
#define BACKTRACK_AS(type) ((type *)backtrack) |
5570 |
|
|
5571 |
static pcre_uchar *compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail) |
static void compile_dnref_search(compiler_common *common, pcre_uchar *cc, jump_list **backtracks) |
5572 |
|
{ |
5573 |
|
/* The OVECTOR offset goes to TMP2. */ |
5574 |
|
DEFINE_COMPILER; |
5575 |
|
int count = GET2(cc, 1 + IMM2_SIZE); |
5576 |
|
pcre_uchar *slot = common->name_table + GET2(cc, 1) * common->name_entry_size; |
5577 |
|
unsigned int offset; |
5578 |
|
jump_list *found = NULL; |
5579 |
|
|
5580 |
|
SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI); |
5581 |
|
|
5582 |
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)); |
5583 |
|
|
5584 |
|
count--; |
5585 |
|
while (count-- > 0) |
5586 |
|
{ |
5587 |
|
offset = GET2(slot, 0) << 1; |
5588 |
|
GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset)); |
5589 |
|
add_jump(compiler, &found, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0)); |
5590 |
|
slot += common->name_entry_size; |
5591 |
|
} |
5592 |
|
|
5593 |
|
offset = GET2(slot, 0) << 1; |
5594 |
|
GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset)); |
5595 |
|
if (backtracks != NULL && !common->jscript_compat) |
5596 |
|
add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0)); |
5597 |
|
|
5598 |
|
set_jumps(found, LABEL()); |
5599 |
|
} |
5600 |
|
|
5601 |
|
static void compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail) |
5602 |
{ |
{ |
5603 |
DEFINE_COMPILER; |
DEFINE_COMPILER; |
5604 |
int offset = GET2(cc, 1) << 1; |
BOOL ref = (*cc == OP_REF || *cc == OP_REFI); |
5605 |
|
int offset = 0; |
5606 |
struct sljit_jump *jump = NULL; |
struct sljit_jump *jump = NULL; |
5607 |
struct sljit_jump *partial; |
struct sljit_jump *partial; |
5608 |
struct sljit_jump *nopartial; |
struct sljit_jump *nopartial; |
5609 |
|
|
5610 |
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset)); |
if (ref) |
5611 |
/* OVECTOR(1) contains the "string begin - 1" constant. */ |
{ |
5612 |
if (withchecks && !common->jscript_compat) |
offset = GET2(cc, 1) << 1; |
5613 |
add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1))); |
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset)); |
5614 |
|
/* OVECTOR(1) contains the "string begin - 1" constant. */ |
5615 |
|
if (withchecks && !common->jscript_compat) |
5616 |
|
add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1))); |
5617 |
|
} |
5618 |
|
else |
5619 |
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0); |
5620 |
|
|
5621 |
#if defined SUPPORT_UTF && defined SUPPORT_UCP |
#if defined SUPPORT_UTF && defined SUPPORT_UCP |
5622 |
if (common->utf && *cc == OP_REFI) |
if (common->utf && *cc == OP_REFI) |
5623 |
{ |
{ |
5624 |
SLJIT_ASSERT(TMP1 == SLJIT_SCRATCH_REG1 && STACK_TOP == SLJIT_SCRATCH_REG2 && TMP2 == SLJIT_SCRATCH_REG3); |
SLJIT_ASSERT(TMP1 == SLJIT_SCRATCH_REG1 && STACK_TOP == SLJIT_SCRATCH_REG2 && TMP2 == SLJIT_SCRATCH_REG3); |
5625 |
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1)); |
if (ref) |
5626 |
|
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1)); |
5627 |
|
else |
5628 |
|
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw)); |
5629 |
|
|
5630 |
if (withchecks) |
if (withchecks) |
5631 |
jump = CMP(SLJIT_C_EQUAL, TMP1, 0, TMP2, 0); |
jump = CMP(SLJIT_C_EQUAL, TMP1, 0, TMP2, 0); |
5632 |
|
|
5651 |
else |
else |
5652 |
#endif /* SUPPORT_UTF && SUPPORT_UCP */ |
#endif /* SUPPORT_UTF && SUPPORT_UCP */ |
5653 |
{ |
{ |
5654 |
OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0); |
if (ref) |
5655 |
|
OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0); |
5656 |
|
else |
5657 |
|
OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0); |
5658 |
|
|
5659 |
if (withchecks) |
if (withchecks) |
5660 |
jump = JUMP(SLJIT_C_ZERO); |
jump = JUMP(SLJIT_C_ZERO); |
5661 |
|
|
5692 |
else |
else |
5693 |
JUMPHERE(jump); |
JUMPHERE(jump); |
5694 |
} |
} |
|
return cc + 1 + IMM2_SIZE; |
|
5695 |
} |
} |
5696 |
|
|
5697 |
static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent) |
static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent) |
5698 |
{ |
{ |
5699 |
DEFINE_COMPILER; |
DEFINE_COMPILER; |
5700 |
|
BOOL ref = (*cc == OP_REF || *cc == OP_REFI); |
5701 |
backtrack_common *backtrack; |
backtrack_common *backtrack; |
5702 |
pcre_uchar type; |
pcre_uchar type; |
5703 |
|
int offset = 0; |
5704 |
struct sljit_label *label; |
struct sljit_label *label; |
5705 |
struct sljit_jump *zerolength; |
struct sljit_jump *zerolength; |
5706 |
struct sljit_jump *jump = NULL; |
struct sljit_jump *jump = NULL; |
5710 |
|
|
5711 |
PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL); |
PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL); |
5712 |
|
|
5713 |
|
if (ref) |
5714 |
|
offset = GET2(cc, 1) << 1; |
5715 |
|
else |
5716 |
|
cc += IMM2_SIZE; |
5717 |
type = cc[1 + IMM2_SIZE]; |
type = cc[1 + IMM2_SIZE]; |
5718 |
|
|
5719 |
|
SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even); |
5720 |
minimize = (type & 0x1) != 0; |
minimize = (type & 0x1) != 0; |
5721 |
switch(type) |
switch(type) |
5722 |
{ |
{ |
5754 |
if (min == 0) |
if (min == 0) |
5755 |
{ |
{ |
5756 |
allocate_stack(common, 2); |
allocate_stack(common, 2); |
5757 |
|
if (ref) |
5758 |
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset)); |
5759 |
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); |
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); |
5760 |
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0); |
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0); |
5761 |
/* Temporary release of STR_PTR. */ |
/* Temporary release of STR_PTR. */ |
5762 |
OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw)); |
OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw)); |
5763 |
zerolength = compile_ref_checks(common, ccbegin, NULL); |
/* Handles both invalid and empty cases. Since the minimum repeat, |
5764 |
|
is zero the invalid case is basically the same as an empty case. */ |
5765 |
|
if (ref) |
5766 |
|
zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1)); |
5767 |
|
else |
5768 |
|
{ |
5769 |
|
compile_dnref_search(common, ccbegin, NULL); |
5770 |
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0); |
5771 |
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, TMP2, 0); |
5772 |
|
zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw)); |
5773 |
|
} |
5774 |
/* Restore if not zero length. */ |
/* Restore if not zero length. */ |
5775 |
OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw)); |
OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw)); |
5776 |
} |
} |
5777 |
else |
else |
5778 |
{ |
{ |
5779 |
allocate_stack(common, 1); |
allocate_stack(common, 1); |
5780 |
|
if (ref) |
5781 |
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset)); |
5782 |
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); |
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); |
5783 |
zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks); |
if (ref) |
5784 |
|
{ |
5785 |
|
add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1))); |
5786 |
|
zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1)); |
5787 |
|
} |
5788 |
|
else |
5789 |
|
{ |
5790 |
|
compile_dnref_search(common, ccbegin, &backtrack->topbacktracks); |
5791 |
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0); |
5792 |
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, TMP2, 0); |
5793 |
|
zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw)); |
5794 |
|
} |
5795 |
} |
} |
5796 |
|
|
5797 |
if (min > 1 || max > 1) |
if (min > 1 || max > 1) |
5798 |
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0); |
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0); |
5799 |
|
|
5800 |
label = LABEL(); |
label = LABEL(); |
5801 |
|
if (!ref) |
5802 |
|
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1); |
5803 |
compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE); |
compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE); |
5804 |
|
|
5805 |
if (min > 1 || max > 1) |
if (min > 1 || max > 1) |
5834 |
return cc; |
return cc; |
5835 |
} |
} |
5836 |
|
|
5837 |
allocate_stack(common, 2); |
allocate_stack(common, ref ? 2 : 3); |
5838 |
|
if (ref) |
5839 |
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset)); |
5840 |
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); |
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); |
5841 |
if (type != OP_CRMINSTAR) |
if (type != OP_CRMINSTAR) |
5842 |
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0); |
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0); |
5843 |
|
|
5844 |
if (min == 0) |
if (min == 0) |
5845 |
{ |
{ |
5846 |
zerolength = compile_ref_checks(common, ccbegin, NULL); |
/* Handles both invalid and empty cases. Since the minimum repeat, |
5847 |
|
is zero the invalid case is basically the same as an empty case. */ |
5848 |
|
if (ref) |
5849 |
|
zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1)); |
5850 |
|
else |
5851 |
|
{ |
5852 |
|
compile_dnref_search(common, ccbegin, NULL); |
5853 |
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0); |
5854 |
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0); |
5855 |
|
zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw)); |
5856 |
|
} |
5857 |
|
/* Length is non-zero, we can match real repeats. */ |
5858 |
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); |
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); |
5859 |
jump = JUMP(SLJIT_JUMP); |
jump = JUMP(SLJIT_JUMP); |
5860 |
} |
} |
5861 |
else |
else |
5862 |
zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks); |
{ |
5863 |
|
if (ref) |
5864 |
|
{ |
5865 |
|
add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1))); |
5866 |
|
zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1)); |
5867 |
|
} |
5868 |
|
else |
5869 |
|
{ |
5870 |
|
compile_dnref_search(common, ccbegin, &backtrack->topbacktracks); |
5871 |
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0); |
5872 |
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0); |
5873 |
|
zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw)); |
5874 |
|
} |
5875 |
|
} |
5876 |
|
|
5877 |
BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL(); |
BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL(); |
5878 |
if (max > 0) |
if (max > 0) |
5879 |
add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max)); |
add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max)); |
5880 |
|
|
5881 |
|
if (!ref) |
5882 |
|
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2)); |
5883 |
compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE); |
compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE); |
5884 |
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); |
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); |
5885 |
|
|
6467 |
return cc + 1 + LINK_SIZE; |
return cc + 1 + LINK_SIZE; |
6468 |
} |
} |
6469 |
|
|
|
static sljit_sw SLJIT_CALL do_searchovector(sljit_uw refno, sljit_sw* locals, pcre_uchar *name_table) |
|
|
{ |
|
|
int condition = FALSE; |
|
|
pcre_uchar *slotA = name_table; |
|
|
pcre_uchar *slotB; |
|
|
sljit_sw name_count = locals[LOCALS0 / sizeof(sljit_sw)]; |
|
|
sljit_sw name_entry_size = locals[LOCALS1 / sizeof(sljit_sw)]; |
|
|
sljit_sw no_capture; |
|
|
int i; |
|
|
|
|
|
locals += refno & 0xff; |
|
|
refno >>= 8; |
|
|
no_capture = locals[1]; |
|
|
|
|
|
for (i = 0; i < name_count; i++) |
|
|
{ |
|
|
if (GET2(slotA, 0) == refno) break; |
|
|
slotA += name_entry_size; |
|
|
} |
|
|
|
|
|
if (i < name_count) |
|
|
{ |
|
|
/* Found a name for the number - there can be only one; duplicate names |
|
|
for different numbers are allowed, but not vice versa. First scan down |
|
|
for duplicates. */ |
|
|
|
|
|
slotB = slotA; |
|
|
while (slotB > name_table) |
|
|
{ |
|
|
slotB -= name_entry_size; |
|
|
if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0) |
|
|
{ |
|
|
condition = locals[GET2(slotB, 0) << 1] != no_capture; |
|
|
if (condition) break; |
|
|
} |
|
|
else break; |
|
|
} |
|
|
|
|
|
/* Scan up for duplicates */ |
|
|
if (!condition) |
|
|
{ |
|
|
slotB = slotA; |
|
|
for (i++; i < name_count; i++) |
|
|
{ |
|
|
slotB += name_entry_size; |
|
|
if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0) |
|
|
{ |
|
|
condition = locals[GET2(slotB, 0) << 1] != no_capture; |
|
|
if (condition) break; |
|
|
} |
|
|
else break; |
|
|
} |
|
|
} |
|
|
} |
|
|
return condition; |
|
|
} |
|
|
|
|
|
static sljit_sw SLJIT_CALL do_searchgroups(sljit_uw recno, sljit_uw* locals, pcre_uchar *name_table) |
|
|
{ |
|
|
int condition = FALSE; |
|
|
pcre_uchar *slotA = name_table; |
|
|
pcre_uchar *slotB; |
|
|
sljit_uw name_count = locals[LOCALS0 / sizeof(sljit_sw)]; |
|
|
sljit_uw name_entry_size = locals[LOCALS1 / sizeof(sljit_sw)]; |
|
|
sljit_uw group_num = locals[POSSESSIVE0 / sizeof(sljit_sw)]; |
|
|
sljit_uw i; |
|
|
|
|
|
for (i = 0; i < name_count; i++) |
|
|
{ |
|
|
if (GET2(slotA, 0) == recno) break; |
|
|
slotA += name_entry_size; |
|
|
} |
|
|
|
|
|
if (i < name_count) |
|
|
{ |
|
|
/* Found a name for the number - there can be only one; duplicate |
|
|
names for different numbers are allowed, but not vice versa. First |
|
|
scan down for duplicates. */ |
|
|
|
|
|
slotB = slotA; |
|
|
while (slotB > name_table) |
|
|
{ |
|
|
slotB -= name_entry_size; |
|
|
if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0) |
|
|
{ |
|
|
condition = GET2(slotB, 0) == group_num; |
|
|
if (condition) break; |
|
|
} |
|
|
else break; |
|
|
} |
|
|
|
|
|
/* Scan up for duplicates */ |
|
|
if (!condition) |
|
|
{ |
|
|
slotB = slotA; |
|
|
for (i++; i < name_count; i++) |
|
|
{ |
|
|
slotB += name_entry_size; |
|
|
if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0) |
|
|
{ |
|
|
condition = GET2(slotB, 0) == group_num; |
|
|
if (condition) break; |
|
|
} |
|
|
else break; |
|
|
} |
|
|
} |
|
|
} |
|
|
return condition; |
|
|
} |
|
|
|
|
6470 |
static SLJIT_INLINE void match_once_common(compiler_common *common, pcre_uchar ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head) |
static SLJIT_INLINE void match_once_common(compiler_common *common, pcre_uchar ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head) |
6471 |
{ |
{ |
6472 |
DEFINE_COMPILER; |
DEFINE_COMPILER; |
6599 |
pcre_uchar opcode; |
pcre_uchar opcode; |
6600 |
int private_data_ptr = 0; |
int private_data_ptr = 0; |
6601 |
int offset = 0; |
int offset = 0; |
6602 |
int stacksize; |
int i, stacksize; |
6603 |
int repeat_ptr = 0, repeat_length = 0; |
int repeat_ptr = 0, repeat_length = 0; |
6604 |
int repeat_type = 0, repeat_count = 0; |
int repeat_type = 0, repeat_count = 0; |
6605 |
pcre_uchar *ccbegin; |
pcre_uchar *ccbegin; |
6606 |
pcre_uchar *matchingpath; |
pcre_uchar *matchingpath; |
6607 |
|
pcre_uchar *slot; |
6608 |
pcre_uchar bra = OP_BRA; |
pcre_uchar bra = OP_BRA; |
6609 |
pcre_uchar ket; |
pcre_uchar ket; |
6610 |
assert_backtrack *assert; |
assert_backtrack *assert; |
6654 |
cc += GET(cc, 1); |
cc += GET(cc, 1); |
6655 |
|
|
6656 |
has_alternatives = *cc == OP_ALT; |
has_alternatives = *cc == OP_ALT; |
6657 |
if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND)) |
if (SLJIT_UNLIKELY(opcode == OP_COND || opcode == OP_SCOND)) |
6658 |
{ |
has_alternatives = (*matchingpath == OP_RREF || *matchingpath == OP_DNRREF) ? FALSE : TRUE; |
|
has_alternatives = (*matchingpath == OP_RREF) ? FALSE : TRUE; |
|
|
if (*matchingpath == OP_NRREF) |
|
|
{ |
|
|
stacksize = GET2(matchingpath, 1); |
|
|
if (common->currententry == NULL || stacksize == RREF_ANY) |
|
|
has_alternatives = FALSE; |
|
|
else if (common->currententry->start == 0) |
|
|
has_alternatives = stacksize != 0; |
|
|
else |
|
|
has_alternatives = stacksize != (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE); |
|
|
} |
|
|
} |
|
6659 |
|
|
6660 |
if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN)) |
if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN)) |
6661 |
opcode = OP_SCOND; |
opcode = OP_SCOND; |
6892 |
CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1))); |
CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1))); |
6893 |
matchingpath += 1 + IMM2_SIZE; |
matchingpath += 1 + IMM2_SIZE; |
6894 |
} |
} |
6895 |
else if (*matchingpath == OP_NCREF) |
else if (*matchingpath == OP_DNCREF) |
6896 |
{ |
{ |
6897 |
SLJIT_ASSERT(has_alternatives); |
SLJIT_ASSERT(has_alternatives); |
|
stacksize = GET2(matchingpath, 1); |
|
|
jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(stacksize << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)); |
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0); |
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count); |
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size); |
|
|
OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, (stacksize << 8) | (common->ovector_start / sizeof(sljit_sw))); |
|
|
GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, 0); |
|
|
OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, common->name_table); |
|
|
sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchovector)); |
|
|
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1); |
|
|
add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, 0)); |
|
6898 |
|
|
6899 |
JUMPHERE(jump); |
i = GET2(matchingpath, 1 + IMM2_SIZE); |
6900 |
matchingpath += 1 + IMM2_SIZE; |
slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size; |
6901 |
|
OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0); |
6902 |
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)); |
6903 |
|
OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(slot, 0) << 1), TMP1, 0); |
6904 |
|
slot += common->name_entry_size; |
6905 |
|
i--; |
6906 |
|
while (i-- > 0) |
6907 |
|
{ |
6908 |
|
OP2(SLJIT_SUB, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(slot, 0) << 1), TMP1, 0); |
6909 |
|
OP2(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, STR_PTR, 0); |
6910 |
|
slot += common->name_entry_size; |
6911 |
|
} |
6912 |
|
OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0); |
6913 |
|
add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), JUMP(SLJIT_C_ZERO)); |
6914 |
|
matchingpath += 1 + 2 * IMM2_SIZE; |
6915 |
} |
} |
6916 |
else if (*matchingpath == OP_RREF || *matchingpath == OP_NRREF) |
else if (*matchingpath == OP_RREF || *matchingpath == OP_DNRREF) |
6917 |
{ |
{ |
6918 |
/* Never has other case. */ |
/* Never has other case. */ |
6919 |
BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL; |
BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL; |
6920 |
|
SLJIT_ASSERT(!has_alternatives); |
6921 |
|
|
6922 |
stacksize = GET2(matchingpath, 1); |
if (*matchingpath == OP_RREF) |
|
if (common->currententry == NULL) |
|
|
stacksize = 0; |
|
|
else if (stacksize == RREF_ANY) |
|
|
stacksize = 1; |
|
|
else if (common->currententry->start == 0) |
|
|
stacksize = stacksize == 0; |
|
|
else |
|
|
stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE); |
|
|
|
|
|
if (*matchingpath == OP_RREF || stacksize || common->currententry == NULL) |
|
6923 |
{ |
{ |
6924 |
SLJIT_ASSERT(!has_alternatives); |
stacksize = GET2(matchingpath, 1); |
6925 |
|
if (common->currententry == NULL) |
6926 |
|
stacksize = 0; |
6927 |
|
else if (stacksize == RREF_ANY) |
6928 |
|
stacksize = 1; |
6929 |
|
else if (common->currententry->start == 0) |
6930 |
|
stacksize = stacksize == 0; |
6931 |
|
else |
6932 |
|
stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE); |
6933 |
|
|
6934 |
if (stacksize != 0) |
if (stacksize != 0) |
6935 |
matchingpath += 1 + IMM2_SIZE; |
matchingpath += 1 + IMM2_SIZE; |
6936 |
|
} |
6937 |
|
else |
6938 |
|
{ |
6939 |
|
if (common->currententry == NULL || common->currententry->start == 0) |
6940 |
|
stacksize = 0; |
6941 |
else |
else |
6942 |
{ |
{ |
6943 |
|
stacksize = GET2(matchingpath, 1 + IMM2_SIZE); |
6944 |
|
slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size; |
6945 |
|
i = (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE); |
6946 |
|
while (stacksize > 0) |
6947 |
|
{ |
6948 |
|
if ((int)GET2(slot, 0) == i) |
6949 |
|
break; |
6950 |
|
slot += common->name_entry_size; |
6951 |
|
stacksize--; |
6952 |
|
} |
6953 |
|
} |
6954 |
|
|
6955 |
|
if (stacksize != 0) |
6956 |
|
matchingpath += 1 + 2 * IMM2_SIZE; |
6957 |
|
} |
6958 |
|
|
6959 |
|
/* The stacksize == 0 is a common "else" case. */ |
6960 |
|
if (stacksize == 0) |
6961 |
|
{ |
6962 |
if (*cc == OP_ALT) |
if (*cc == OP_ALT) |
6963 |
{ |
{ |
6964 |
matchingpath = cc + 1 + LINK_SIZE; |
matchingpath = cc + 1 + LINK_SIZE; |
6967 |
else |
else |
6968 |
matchingpath = cc; |
matchingpath = cc; |
6969 |
} |
} |
|
} |
|
|
else |
|
|
{ |
|
|
SLJIT_ASSERT(has_alternatives); |
|
|
|
|
|
stacksize = GET2(matchingpath, 1); |
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0); |
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count); |
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size); |
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, GET2(common->start, common->currententry->start + 1 + LINK_SIZE)); |
|
|
OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, stacksize); |
|
|
GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, 0); |
|
|
OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, common->name_table); |
|
|
sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchgroups)); |
|
|
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1); |
|
|
add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, 0)); |
|
|
matchingpath += 1 + IMM2_SIZE; |
|
|
} |
|
6970 |
} |
} |
6971 |
else |
else |
6972 |
{ |
{ |
7408 |
return cc + 1 + LINK_SIZE; |
return cc + 1 + LINK_SIZE; |
7409 |
} |
} |
7410 |
|
|
7411 |
static SLJIT_INLINE pcre_uchar *get_iterator_parameters(compiler_common *common, pcre_uchar *cc, pcre_uchar *opcode, pcre_uchar *type, int *arg1, int *arg2, pcre_uchar **end) |
static SLJIT_INLINE pcre_uchar *get_iterator_parameters(compiler_common *common, pcre_uchar *cc, pcre_uchar *opcode, pcre_uchar *type, int *max, int *min, pcre_uchar **end) |
7412 |
{ |
{ |
7413 |
int class_len; |
int class_len; |
7414 |
|
|
7444 |
} |
} |
7445 |
else |
else |
7446 |
{ |
{ |
7447 |
SLJIT_ASSERT(*opcode >= OP_CLASS || *opcode <= OP_XCLASS); |
SLJIT_ASSERT(*opcode == OP_CLASS || *opcode == OP_NCLASS || *opcode == OP_XCLASS); |
7448 |
*type = *opcode; |
*type = *opcode; |
7449 |
cc++; |
cc++; |
7450 |
class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(pcre_uchar))) : GET(cc, 0); |
class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(pcre_uchar))) : GET(cc, 0); |
7455 |
if (end != NULL) |
if (end != NULL) |
7456 |
*end = cc + class_len; |
*end = cc + class_len; |
7457 |
} |
} |
7458 |
|
else if (*opcode >= OP_CRPOSSTAR && *opcode <= OP_CRPOSQUERY) |
7459 |
|
{ |
7460 |
|
*opcode -= OP_CRPOSSTAR - OP_POSSTAR; |
7461 |
|
if (end != NULL) |
7462 |
|
*end = cc + class_len; |
7463 |
|
} |
7464 |
else |
else |
7465 |
{ |
{ |
7466 |
SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE); |
SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE || *opcode == OP_CRPOSRANGE); |
7467 |
*arg1 = GET2(cc, (class_len + IMM2_SIZE)); |
*max = GET2(cc, (class_len + IMM2_SIZE)); |
7468 |
*arg2 = GET2(cc, class_len); |
*min = GET2(cc, class_len); |
7469 |
|
|
7470 |
if (*arg2 == 0) |
if (*min == 0) |
7471 |
{ |
{ |
7472 |
SLJIT_ASSERT(*arg1 != 0); |
SLJIT_ASSERT(*max != 0); |
7473 |
*opcode = (*opcode == OP_CRRANGE) ? OP_UPTO : OP_MINUPTO; |
*opcode = (*opcode == OP_CRRANGE) ? OP_UPTO : (*opcode == OP_CRMINRANGE ? OP_MINUPTO : OP_POSUPTO); |
7474 |
} |
} |
7475 |
if (*arg1 == *arg2) |
if (*max == *min) |
7476 |
*opcode = OP_EXACT; |
*opcode = OP_EXACT; |
7477 |
|
|
7478 |
if (end != NULL) |
if (end != NULL) |
7483 |
|
|
7484 |
if (*opcode == OP_UPTO || *opcode == OP_MINUPTO || *opcode == OP_EXACT || *opcode == OP_POSUPTO) |
if (*opcode == OP_UPTO || *opcode == OP_MINUPTO || *opcode == OP_EXACT || *opcode == OP_POSUPTO) |
7485 |
{ |
{ |
7486 |
*arg1 = GET2(cc, 0); |
*max = GET2(cc, 0); |
7487 |
cc += IMM2_SIZE; |
cc += IMM2_SIZE; |
7488 |
} |
} |
7489 |
|
|
7512 |
backtrack_common *backtrack; |
backtrack_common *backtrack; |
7513 |
pcre_uchar opcode; |
pcre_uchar opcode; |
7514 |
pcre_uchar type; |
pcre_uchar type; |
7515 |
int arg1 = -1, arg2 = -1; |
int max = -1, min = -1; |
7516 |
pcre_uchar* end; |
pcre_uchar* end; |
7517 |
jump_list *nomatch = NULL; |
jump_list *nomatch = NULL; |
7518 |
struct sljit_jump *jump = NULL; |
struct sljit_jump *jump = NULL; |
7525 |
|
|
7526 |
PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL); |
PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL); |
7527 |
|
|
7528 |
cc = get_iterator_parameters(common, cc, &opcode, &type, &arg1, &arg2, &end); |
cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &min, &end); |
7529 |
|
|
7530 |
switch(type) |
switch(type) |
7531 |
{ |
{ |
7596 |
{ |
{ |
7597 |
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0); |
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0); |
7598 |
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); |
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); |
7599 |
if (opcode == OP_CRRANGE && arg2 > 0) |
if (opcode == OP_CRRANGE && min > 0) |
7600 |
CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg2, label); |
CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, label); |
7601 |
if (opcode == OP_UPTO || (opcode == OP_CRRANGE && arg1 > 0)) |
if (opcode == OP_UPTO || (opcode == OP_CRRANGE && max > 0)) |
7602 |
jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, arg1); |
jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max); |
7603 |
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0); |
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0); |
7604 |
} |
} |
7605 |
|
|
7626 |
OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); |
OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); |
7627 |
if (opcode <= OP_PLUS) |
if (opcode <= OP_PLUS) |
7628 |
JUMPTO(SLJIT_JUMP, label); |
JUMPTO(SLJIT_JUMP, label); |
7629 |
else if (opcode == OP_CRRANGE && arg1 == 0) |
else if (opcode == OP_CRRANGE && max == 0) |
7630 |
{ |
{ |
7631 |
OP2(SLJIT_ADD, base, offset1, base, offset1, SLJIT_IMM, 1); |
OP2(SLJIT_ADD, base, offset1, base, offset1, SLJIT_IMM, 1); |
7632 |
JUMPTO(SLJIT_JUMP, label); |
JUMPTO(SLJIT_JUMP, label); |
7636 |
OP1(SLJIT_MOV, TMP1, 0, base, offset1); |
OP1(SLJIT_MOV, TMP1, 0, base, offset1); |
7637 |
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); |
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); |
7638 |
OP1(SLJIT_MOV, base, offset1, TMP1, 0); |
OP1(SLJIT_MOV, base, offset1, TMP1, 0); |
7639 |
CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 1, label); |
CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, max + 1, label); |
7640 |
} |
} |
7641 |
set_jumps(nomatch, LABEL()); |
set_jumps(nomatch, LABEL()); |
7642 |
if (opcode == OP_CRRANGE) |
if (opcode == OP_CRRANGE) |
7643 |
add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_LESS, base, offset1, SLJIT_IMM, arg2 + 1)); |
add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_LESS, base, offset1, SLJIT_IMM, min + 1)); |
7644 |
OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); |
OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); |
7645 |
} |
} |
7646 |
BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL(); |
BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL(); |
7678 |
break; |
break; |
7679 |
|
|
7680 |
case OP_EXACT: |
case OP_EXACT: |
7681 |
OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, arg1); |
OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max); |
7682 |
label = LABEL(); |
label = LABEL(); |
7683 |
compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks); |
compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks); |
7684 |
OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); |
OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); |
7691 |
if (opcode == OP_POSPLUS) |
if (opcode == OP_POSPLUS) |
7692 |
compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks); |
compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks); |
7693 |
if (opcode == OP_POSUPTO) |
if (opcode == OP_POSUPTO) |
7694 |
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_IMM, arg1); |
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_IMM, max); |
7695 |
OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0); |
OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0); |
7696 |
label = LABEL(); |
label = LABEL(); |
7697 |
compile_char1_matchingpath(common, type, cc, &nomatch); |
compile_char1_matchingpath(common, type, cc, &nomatch); |
7715 |
OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset); |
OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset); |
7716 |
break; |
break; |
7717 |
|
|
7718 |
|
case OP_CRPOSRANGE: |
7719 |
|
/* Combination of OP_EXACT and OP_POSSTAR or OP_POSUPTO */ |
7720 |
|
OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, min); |
7721 |
|
label = LABEL(); |
7722 |
|
compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks); |
7723 |
|
OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); |
7724 |
|
JUMPTO(SLJIT_C_NOT_ZERO, label); |
7725 |
|
|
7726 |
|
if (max != 0) |
7727 |
|
{ |
7728 |
|
SLJIT_ASSERT(max - min > 0); |
7729 |
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_IMM, max - min); |
7730 |
|
} |
7731 |
|
OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0); |
7732 |
|
label = LABEL(); |
7733 |
|
compile_char1_matchingpath(common, type, cc, &nomatch); |
7734 |
|
OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0); |
7735 |
|
if (max == 0) |
7736 |
|
JUMPTO(SLJIT_JUMP, label); |
7737 |
|
else |
7738 |
|
{ |
7739 |
|
OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_IMM, 1); |
7740 |
|
JUMPTO(SLJIT_C_NOT_ZERO, label); |
7741 |
|
} |
7742 |
|
set_jumps(nomatch, LABEL()); |
7743 |
|
OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset); |
7744 |
|
break; |
7745 |
|
|
7746 |
default: |
default: |
7747 |
SLJIT_ASSERT_STOP(); |
SLJIT_ASSERT_STOP(); |
7748 |
break; |
break; |
8020 |
|
|
8021 |
case OP_CLASS: |
case OP_CLASS: |
8022 |
case OP_NCLASS: |
case OP_NCLASS: |
8023 |
if (cc[1 + (32 / sizeof(pcre_uchar))] >= OP_CRSTAR && cc[1 + (32 / sizeof(pcre_uchar))] <= OP_CRMINRANGE) |
if (cc[1 + (32 / sizeof(pcre_uchar))] >= OP_CRSTAR && cc[1 + (32 / sizeof(pcre_uchar))] <= OP_CRPOSRANGE) |
8024 |
cc = compile_iterator_matchingpath(common, cc, parent); |
cc = compile_iterator_matchingpath(common, cc, parent); |
8025 |
else |
else |
8026 |
cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks); |
cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks); |
8028 |
|
|
8029 |
#if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32 |
#if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32 |
8030 |
case OP_XCLASS: |
case OP_XCLASS: |
8031 |
if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRMINRANGE) |
if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRPOSRANGE) |
8032 |
cc = compile_iterator_matchingpath(common, cc, parent); |
cc = compile_iterator_matchingpath(common, cc, parent); |
8033 |
else |
else |
8034 |
cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks); |
cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks); |
8037 |
|
|
8038 |
case OP_REF: |
case OP_REF: |
8039 |
case OP_REFI: |
case OP_REFI: |
8040 |
if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRMINRANGE) |
if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRPOSRANGE) |
8041 |
|
cc = compile_ref_iterator_matchingpath(common, cc, parent); |
8042 |
|
else |
8043 |
|
{ |
8044 |
|
compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE); |
8045 |
|
cc += 1 + IMM2_SIZE; |
8046 |
|
} |
8047 |
|
break; |
8048 |
|
|
8049 |
|
case OP_DNREF: |
8050 |
|
case OP_DNREFI: |
8051 |
|
if (cc[1 + 2 * IMM2_SIZE] >= OP_CRSTAR && cc[1 + 2 * IMM2_SIZE] <= OP_CRPOSRANGE) |
8052 |
cc = compile_ref_iterator_matchingpath(common, cc, parent); |
cc = compile_ref_iterator_matchingpath(common, cc, parent); |
8053 |
else |
else |
8054 |
cc = compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE); |
{ |
8055 |
|
compile_dnref_search(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks); |
8056 |
|
compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE); |
8057 |
|
cc += 1 + 2 * IMM2_SIZE; |
8058 |
|
} |
8059 |
break; |
break; |
8060 |
|
|
8061 |
case OP_RECURSE: |
case OP_RECURSE: |
8208 |
pcre_uchar *cc = current->cc; |
pcre_uchar *cc = current->cc; |
8209 |
pcre_uchar opcode; |
pcre_uchar opcode; |
8210 |
pcre_uchar type; |
pcre_uchar type; |
8211 |
int arg1 = -1, arg2 = -1; |
int max = -1, min = -1; |
8212 |
struct sljit_label *label = NULL; |
struct sljit_label *label = NULL; |
8213 |
struct sljit_jump *jump = NULL; |
struct sljit_jump *jump = NULL; |
8214 |
jump_list *jumplist = NULL; |
jump_list *jumplist = NULL; |
8217 |
int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr; |
int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr; |
8218 |
int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw); |
int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw); |
8219 |
|
|
8220 |
cc = get_iterator_parameters(common, cc, &opcode, &type, &arg1, &arg2, NULL); |
cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &min, NULL); |
8221 |
|
|
8222 |
switch(opcode) |
switch(opcode) |
8223 |
{ |
{ |
8236 |
else |
else |
8237 |
{ |
{ |
8238 |
if (opcode == OP_UPTO) |
if (opcode == OP_UPTO) |
8239 |
arg2 = 0; |
min = 0; |
8240 |
if (opcode <= OP_PLUS) |
if (opcode <= OP_PLUS) |
8241 |
{ |
{ |
8242 |
OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); |
OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); |
8246 |
{ |
{ |
8247 |
OP1(SLJIT_MOV, TMP1, 0, base, offset1); |
OP1(SLJIT_MOV, TMP1, 0, base, offset1); |
8248 |
OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); |
OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); |
8249 |
jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, arg2 + 1); |
jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, min + 1); |
8250 |
OP2(SLJIT_SUB, base, offset1, TMP1, 0, SLJIT_IMM, 1); |
OP2(SLJIT_SUB, base, offset1, TMP1, 0, SLJIT_IMM, 1); |
8251 |
} |
} |
8252 |
skip_char_back(common); |
skip_char_back(common); |
8291 |
OP1(SLJIT_MOV, base, offset1, TMP1, 0); |
OP1(SLJIT_MOV, base, offset1, TMP1, 0); |
8292 |
|
|
8293 |
if (opcode == OP_CRMINRANGE) |
if (opcode == OP_CRMINRANGE) |
8294 |
CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg2 + 1, label); |
CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min + 1, label); |
8295 |
|
|
8296 |
if (opcode == OP_CRMINRANGE && arg1 == 0) |
if (opcode == OP_CRMINRANGE && max == 0) |
8297 |
JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath); |
JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath); |
8298 |
else |
else |
8299 |
CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 2, CURRENT_AS(iterator_backtrack)->matchingpath); |
CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, max + 2, CURRENT_AS(iterator_backtrack)->matchingpath); |
8300 |
|
|
8301 |
set_jumps(jumplist, LABEL()); |
set_jumps(jumplist, LABEL()); |
8302 |
if (private_data_ptr == 0) |
if (private_data_ptr == 0) |
8331 |
|
|
8332 |
case OP_EXACT: |
case OP_EXACT: |
8333 |
case OP_POSPLUS: |
case OP_POSPLUS: |
8334 |
|
case OP_CRPOSRANGE: |
8335 |
set_jumps(current->topbacktracks, LABEL()); |
set_jumps(current->topbacktracks, LABEL()); |
8336 |
break; |
break; |
8337 |
|
|
8350 |
{ |
{ |
8351 |
DEFINE_COMPILER; |
DEFINE_COMPILER; |
8352 |
pcre_uchar *cc = current->cc; |
pcre_uchar *cc = current->cc; |
8353 |
|
BOOL ref = (*cc == OP_REF || *cc == OP_REFI); |
8354 |
pcre_uchar type; |
pcre_uchar type; |
8355 |
|
|
8356 |
type = cc[1 + IMM2_SIZE]; |
type = cc[ref ? 1 + IMM2_SIZE : 1 + 2 * IMM2_SIZE]; |
8357 |
|
|
8358 |
if ((type & 0x1) == 0) |
if ((type & 0x1) == 0) |
8359 |
{ |
{ |
8360 |
|
/* Maximize case. */ |
8361 |
set_jumps(current->topbacktracks, LABEL()); |
set_jumps(current->topbacktracks, LABEL()); |
8362 |
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); |
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); |
8363 |
free_stack(common, 1); |
free_stack(common, 1); |
8368 |
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); |
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); |
8369 |
CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->matchingpath); |
CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->matchingpath); |
8370 |
set_jumps(current->topbacktracks, LABEL()); |
set_jumps(current->topbacktracks, LABEL()); |
8371 |
free_stack(common, 2); |
free_stack(common, ref ? 2 : 3); |
8372 |
} |
} |
8373 |
|
|
8374 |
static SLJIT_INLINE void compile_recurse_backtrackingpath(compiler_common *common, struct backtrack_common *current) |
static SLJIT_INLINE void compile_recurse_backtrackingpath(compiler_common *common, struct backtrack_common *current) |
8732 |
return; |
return; |
8733 |
} |
} |
8734 |
|
|
8735 |
/* Instructions after the current alternative is succesfully matched. */ |
/* Instructions after the current alternative is successfully matched. */ |
8736 |
/* There is a similar code in compile_bracket_matchingpath. */ |
/* There is a similar code in compile_bracket_matchingpath. */ |
8737 |
if (opcode == OP_ONCE) |
if (opcode == OP_ONCE) |
8738 |
match_once_common(common, ket, CURRENT_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head); |
match_once_common(common, ket, CURRENT_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head); |
9169 |
|
|
9170 |
case OP_REF: |
case OP_REF: |
9171 |
case OP_REFI: |
case OP_REFI: |
9172 |
|
case OP_DNREF: |
9173 |
|
case OP_DNREFI: |
9174 |
compile_ref_iterator_backtrackingpath(common, current); |
compile_ref_iterator_backtrackingpath(common, current); |
9175 |
break; |
break; |
9176 |
|
|
9464 |
} |
} |
9465 |
common->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0; |
common->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0; |
9466 |
common->ctypes = (sljit_sw)(tables + ctypes_offset); |
common->ctypes = (sljit_sw)(tables + ctypes_offset); |
9467 |
common->digits[0] = -2; |
common->name_table = ((pcre_uchar *)re) + re->name_table_offset; |
|
common->name_table = (sljit_sw)((pcre_uchar *)re + re->name_table_offset); |
|
9468 |
common->name_count = re->name_count; |
common->name_count = re->name_count; |
9469 |
common->name_entry_size = re->name_entry_size; |
common->name_entry_size = re->name_entry_size; |
9470 |
common->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0; |
common->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0; |
9637 |
else if ((re->flags & PCRE_STARTLINE) != 0) |
else if ((re->flags & PCRE_STARTLINE) != 0) |
9638 |
fast_forward_newline(common, (re->options & PCRE_FIRSTLINE) != 0); |
fast_forward_newline(common, (re->options & PCRE_FIRSTLINE) != 0); |
9639 |
else if ((re->flags & PCRE_STARTLINE) == 0 && study != NULL && (study->flags & PCRE_STUDY_MAPPED) != 0) |
else if ((re->flags & PCRE_STARTLINE) == 0 && study != NULL && (study->flags & PCRE_STUDY_MAPPED) != 0) |
9640 |
fast_forward_start_bits(common, (sljit_uw)study->start_bits, (re->options & PCRE_FIRSTLINE) != 0); |
fast_forward_start_bits(common, study->start_bits, (re->options & PCRE_FIRSTLINE) != 0); |
9641 |
} |
} |
9642 |
} |
} |
9643 |
else |
else |
9883 |
} |
} |
9884 |
#endif /* !COMPILE_PCRE32 */ |
#endif /* !COMPILE_PCRE32 */ |
9885 |
#ifdef COMPILE_PCRE8 |
#ifdef COMPILE_PCRE8 |
9886 |
|
if (common->utfreadchar11 != NULL) |
9887 |
|
{ |
9888 |
|
set_jumps(common->utfreadchar11, LABEL()); |
9889 |
|
do_utfreadchar11(common); |
9890 |
|
} |
9891 |
if (common->utfreadtype8 != NULL) |
if (common->utfreadtype8 != NULL) |
9892 |
{ |
{ |
9893 |
set_jumps(common->utfreadtype8, LABEL()); |
set_jumps(common->utfreadtype8, LABEL()); |
10207 |
} |
} |
10208 |
} |
} |
10209 |
|
|
10210 |
|
#if defined COMPILE_PCRE8 |
10211 |
|
PCRE_EXP_DECL void |
10212 |
|
pcre_jit_free_unused_memory(void) |
10213 |
|
#elif defined COMPILE_PCRE16 |
10214 |
|
PCRE_EXP_DECL void |
10215 |
|
pcre16_jit_free_unused_memory(void) |
10216 |
|
#elif defined COMPILE_PCRE32 |
10217 |
|
PCRE_EXP_DECL void |
10218 |
|
pcre32_jit_free_unused_memory(void) |
10219 |
|
#endif |
10220 |
|
{ |
10221 |
|
sljit_free_unused_memory_exec(); |
10222 |
|
} |
10223 |
|
|
10224 |
#else /* SUPPORT_JIT */ |
#else /* SUPPORT_JIT */ |
10225 |
|
|
10226 |
/* These are dummy functions to avoid linking errors when JIT support is not |
/* These are dummy functions to avoid linking errors when JIT support is not |
10272 |
(void)userdata; |
(void)userdata; |
10273 |
} |
} |
10274 |
|
|
10275 |
|
#if defined COMPILE_PCRE8 |
10276 |
|
PCRE_EXP_DECL void |
10277 |
|
pcre_jit_free_unused_memory(void) |
10278 |
|
#elif defined COMPILE_PCRE16 |
10279 |
|
PCRE_EXP_DECL void |
10280 |
|
pcre16_jit_free_unused_memory(void) |
10281 |
|
#elif defined COMPILE_PCRE32 |
10282 |
|
PCRE_EXP_DECL void |
10283 |
|
pcre32_jit_free_unused_memory(void) |
10284 |
|
#endif |
10285 |
|
{ |
10286 |
|
} |
10287 |
|
|
10288 |
#endif |
#endif |
10289 |
|
|
10290 |
/* End of pcre_jit_compile.c */ |
/* End of pcre_jit_compile.c */ |