369 |
int endonly; |
int endonly; |
370 |
/* Tables. */ |
/* Tables. */ |
371 |
sljit_sw ctypes; |
sljit_sw ctypes; |
|
int digits[2 + MAX_RANGE_SIZE]; |
|
372 |
/* Named capturing brackets. */ |
/* Named capturing brackets. */ |
373 |
pcre_uchar *name_table; |
pcre_uchar *name_table; |
374 |
sljit_sw name_count; |
sljit_sw name_count; |
407 |
jump_list *utfreadchar; |
jump_list *utfreadchar; |
408 |
#endif |
#endif |
409 |
#ifdef COMPILE_PCRE8 |
#ifdef COMPILE_PCRE8 |
410 |
|
jump_list *utfreadchar8; |
411 |
jump_list *utfreadtype8; |
jump_list *utfreadtype8; |
412 |
#endif |
#endif |
413 |
#endif /* SUPPORT_UTF */ |
#endif /* SUPPORT_UTF */ |
2512 |
#endif /* SUPPORT_UTF && !COMPILE_PCRE32 */ |
#endif /* SUPPORT_UTF && !COMPILE_PCRE32 */ |
2513 |
} |
} |
2514 |
|
|
2515 |
|
static void read_char8(compiler_common *common) |
2516 |
|
{ |
2517 |
|
/* Reads the precise value of a character into TMP1, if the character is |
2518 |
|
less than 256. Otherwise it returns with a value greater or equal than 256. */ |
2519 |
|
DEFINE_COMPILER; |
2520 |
|
#if defined SUPPORT_UTF && !defined COMPILE_PCRE32 |
2521 |
|
struct sljit_jump *jump; |
2522 |
|
#endif |
2523 |
|
|
2524 |
|
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); |
2525 |
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
2526 |
|
|
2527 |
|
#if defined SUPPORT_UTF && !defined COMPILE_PCRE32 |
2528 |
|
if (common->utf) |
2529 |
|
{ |
2530 |
|
#if defined COMPILE_PCRE8 |
2531 |
|
jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0); |
2532 |
|
add_jump(compiler, &common->utfreadchar8, JUMP(SLJIT_FAST_CALL)); |
2533 |
|
JUMPHERE(jump); |
2534 |
|
#elif defined COMPILE_PCRE16 |
2535 |
|
/* Skip low surrogate if necessary. */ |
2536 |
|
OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800); |
2537 |
|
jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1); |
2538 |
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
2539 |
|
JUMPHERE(jump); |
2540 |
|
#endif /* COMPILE_PCRE[8|16] */ |
2541 |
|
} |
2542 |
|
#endif |
2543 |
|
} |
2544 |
|
|
2545 |
static void read_char8_type(compiler_common *common) |
static void read_char8_type(compiler_common *common) |
2546 |
{ |
{ |
2547 |
/* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */ |
/* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */ |
2568 |
OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes); |
OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes); |
2569 |
JUMPHERE(jump); |
JUMPHERE(jump); |
2570 |
/* Skip low surrogate if necessary. */ |
/* Skip low surrogate if necessary. */ |
2571 |
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xfc00); |
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800); |
2572 |
OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xd800); |
jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1); |
2573 |
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL); |
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
2574 |
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1); |
JUMPHERE(jump); |
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); |
|
2575 |
#elif defined COMPILE_PCRE32 |
#elif defined COMPILE_PCRE32 |
2576 |
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0); |
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0); |
2577 |
jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255); |
jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255); |
2715 |
sljit_emit_fast_return(compiler, RETURN_ADDR, 0); |
sljit_emit_fast_return(compiler, RETURN_ADDR, 0); |
2716 |
} |
} |
2717 |
|
|
2718 |
|
static void do_utfreadchar8(compiler_common *common) |
2719 |
|
{ |
2720 |
|
/* Fast decoding a UTF-8 character. TMP1 contains the first byte |
2721 |
|
of the character (>= 0xc0). Return value in TMP1. */ |
2722 |
|
DEFINE_COMPILER; |
2723 |
|
struct sljit_jump *jump; |
2724 |
|
|
2725 |
|
sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); |
2726 |
|
|
2727 |
|
OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20); |
2728 |
|
jump = JUMP(SLJIT_C_NOT_ZERO); |
2729 |
|
/* Two byte sequence. */ |
2730 |
|
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); |
2731 |
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
2732 |
|
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1f); |
2733 |
|
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6); |
2734 |
|
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); |
2735 |
|
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); |
2736 |
|
sljit_emit_fast_return(compiler, RETURN_ADDR, 0); |
2737 |
|
|
2738 |
|
JUMPHERE(jump); |
2739 |
|
OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0); |
2740 |
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x800); |
2741 |
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); |
2742 |
|
sljit_emit_fast_return(compiler, RETURN_ADDR, 0); |
2743 |
|
} |
2744 |
|
|
2745 |
static void do_utfreadtype8(compiler_common *common) |
static void do_utfreadtype8(compiler_common *common) |
2746 |
{ |
{ |
2747 |
/* Fast decoding a UTF-8 character type. TMP2 contains the first byte |
/* Fast decoding a UTF-8 character type. TMP2 contains the first byte |
2758 |
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); |
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); |
2759 |
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
2760 |
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f); |
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f); |
2761 |
|
/* The upper 5 bits are known at this point. */ |
2762 |
|
compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0x3); |
2763 |
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6); |
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6); |
2764 |
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f); |
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f); |
2765 |
OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0); |
OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0); |
|
compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255); |
|
2766 |
OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes); |
OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes); |
2767 |
sljit_emit_fast_return(compiler, RETURN_ADDR, 0); |
sljit_emit_fast_return(compiler, RETURN_ADDR, 0); |
2768 |
|
|
2769 |
JUMPHERE(compare); |
JUMPHERE(compare); |
2770 |
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0); |
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0); |
2771 |
sljit_emit_fast_return(compiler, RETURN_ADDR, 0); |
sljit_emit_fast_return(compiler, RETURN_ADDR, 0); |
|
JUMPHERE(jump); |
|
2772 |
|
|
2773 |
/* We only have types for characters less than 256. */ |
/* We only have types for characters less than 256. */ |
2774 |
OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0); |
JUMPHERE(jump); |
2775 |
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); |
OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0); |
2776 |
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0); |
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0); |
2777 |
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); |
2778 |
sljit_emit_fast_return(compiler, RETURN_ADDR, 0); |
sljit_emit_fast_return(compiler, RETURN_ADDR, 0); |
2779 |
} |
} |
2780 |
|
|
3774 |
static BOOL check_ranges(compiler_common *common, int *ranges, jump_list **backtracks, BOOL readch) |
static BOOL check_ranges(compiler_common *common, int *ranges, jump_list **backtracks, BOOL readch) |
3775 |
{ |
{ |
3776 |
DEFINE_COMPILER; |
DEFINE_COMPILER; |
3777 |
|
int offset; |
3778 |
|
|
3779 |
if (ranges[0] < 0 || ranges[0] > 4) |
if (ranges[0] < 0 || ranges[0] > 4) |
3780 |
return FALSE; |
return FALSE; |
3784 |
add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); |
add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); |
3785 |
|
|
3786 |
if (readch) |
if (readch) |
3787 |
read_char(common); |
read_char8(common); |
3788 |
|
|
3789 |
switch(ranges[0]) |
switch(ranges[0]) |
3790 |
{ |
{ |
3848 |
|
|
3849 |
if (ranges[1] != 0) |
if (ranges[1] != 0) |
3850 |
{ |
{ |
3851 |
|
offset = 0; |
3852 |
if (ranges[2] + 1 != ranges[3]) |
if (ranges[2] + 1 != ranges[3]) |
3853 |
{ |
{ |
3854 |
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]); |
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]); |
3855 |
add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2])); |
add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2])); |
3856 |
ranges[4] -= ranges[2]; |
offset = ranges[2]; |
|
ranges[5] -= ranges[2]; |
|
3857 |
} |
} |
3858 |
else |
else |
3859 |
add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2])); |
add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2])); |
3860 |
|
|
3861 |
if (ranges[4] + 1 != ranges[5]) |
if (ranges[4] + 1 != ranges[5]) |
3862 |
{ |
{ |
3863 |
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4]); |
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4] - offset); |
3864 |
add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[5] - ranges[4])); |
add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[5] - ranges[4])); |
3865 |
} |
} |
3866 |
else |
else |
3867 |
add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4])); |
add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4] - offset)); |
3868 |
return TRUE; |
return TRUE; |
3869 |
} |
} |
3870 |
|
|
3885 |
} |
} |
3886 |
} |
} |
3887 |
|
|
|
static void get_ctype_ranges(compiler_common *common, int flag, int *ranges) |
|
|
{ |
|
|
int i, bit, length; |
|
|
const pcre_uint8 *ctypes = (const pcre_uint8*)common->ctypes; |
|
|
|
|
|
bit = ctypes[0] & flag; |
|
|
ranges[0] = -1; |
|
|
ranges[1] = bit != 0 ? 1 : 0; |
|
|
length = 0; |
|
|
|
|
|
for (i = 1; i < 256; i++) |
|
|
if ((ctypes[i] & flag) != bit) |
|
|
{ |
|
|
if (length >= MAX_RANGE_SIZE) |
|
|
return; |
|
|
ranges[2 + length] = i; |
|
|
length++; |
|
|
bit ^= flag; |
|
|
} |
|
|
|
|
|
if (bit != 0) |
|
|
{ |
|
|
if (length >= MAX_RANGE_SIZE) |
|
|
return; |
|
|
ranges[2 + length] = 256; |
|
|
length++; |
|
|
} |
|
|
ranges[0] = length; |
|
|
} |
|
|
|
|
3888 |
static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks) |
static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks) |
3889 |
{ |
{ |
3890 |
int ranges[2 + MAX_RANGE_SIZE]; |
int ranges[2 + MAX_RANGE_SIZE]; |
4853 |
case OP_NOT_DIGIT: |
case OP_NOT_DIGIT: |
4854 |
case OP_DIGIT: |
case OP_DIGIT: |
4855 |
/* Digits are usually 0-9, so it is worth to optimize them. */ |
/* Digits are usually 0-9, so it is worth to optimize them. */ |
|
if (common->digits[0] == -2) |
|
|
get_ctype_ranges(common, ctype_digit, common->digits); |
|
4856 |
detect_partial_match(common, backtracks); |
detect_partial_match(common, backtracks); |
4857 |
/* Flip the starting bit in the negative case. */ |
/* Flip the starting bit in the negative case. */ |
4858 |
if (type == OP_NOT_DIGIT) |
read_char8_type(common); |
4859 |
common->digits[1] ^= 1; |
OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit); |
4860 |
if (!check_ranges(common, common->digits, backtracks, TRUE)) |
add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO)); |
|
{ |
|
|
read_char8_type(common); |
|
|
OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit); |
|
|
add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO)); |
|
|
} |
|
|
if (type == OP_NOT_DIGIT) |
|
|
common->digits[1] ^= 1; |
|
4861 |
return cc; |
return cc; |
4862 |
|
|
4863 |
case OP_NOT_WHITESPACE: |
case OP_NOT_WHITESPACE: |
5292 |
case OP_CLASS: |
case OP_CLASS: |
5293 |
case OP_NCLASS: |
case OP_NCLASS: |
5294 |
detect_partial_match(common, backtracks); |
detect_partial_match(common, backtracks); |
5295 |
read_char(common); |
read_char8(common); |
5296 |
if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, FALSE, backtracks)) |
if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, FALSE, backtracks)) |
5297 |
return cc + 32 / sizeof(pcre_uchar); |
return cc + 32 / sizeof(pcre_uchar); |
5298 |
|
|
9354 |
} |
} |
9355 |
common->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0; |
common->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0; |
9356 |
common->ctypes = (sljit_sw)(tables + ctypes_offset); |
common->ctypes = (sljit_sw)(tables + ctypes_offset); |
|
common->digits[0] = -2; |
|
9357 |
common->name_table = ((pcre_uchar *)re) + re->name_table_offset; |
common->name_table = ((pcre_uchar *)re) + re->name_table_offset; |
9358 |
common->name_count = re->name_count; |
common->name_count = re->name_count; |
9359 |
common->name_entry_size = re->name_entry_size; |
common->name_entry_size = re->name_entry_size; |
9773 |
} |
} |
9774 |
#endif /* !COMPILE_PCRE32 */ |
#endif /* !COMPILE_PCRE32 */ |
9775 |
#ifdef COMPILE_PCRE8 |
#ifdef COMPILE_PCRE8 |
9776 |
|
if (common->utfreadchar8 != NULL) |
9777 |
|
{ |
9778 |
|
set_jumps(common->utfreadchar8, LABEL()); |
9779 |
|
do_utfreadchar8(common); |
9780 |
|
} |
9781 |
if (common->utfreadtype8 != NULL) |
if (common->utfreadtype8 != NULL) |
9782 |
{ |
{ |
9783 |
set_jumps(common->utfreadtype8, LABEL()); |
set_jumps(common->utfreadtype8, LABEL()); |