54 |
static char rep_min[] = { 0, 0, 1, 1, 0, 0 }; |
static char rep_min[] = { 0, 0, 1, 1, 0, 0 }; |
55 |
static char rep_max[] = { 0, 0, 0, 0, 1, 1 }; |
static char rep_max[] = { 0, 0, 0, 0, 1, 1 }; |
56 |
|
|
57 |
/* Text forms of OP_ values and things, for debugging */ |
/* Text forms of OP_ values and things, for debugging (not all used) */ |
58 |
|
|
59 |
#ifdef DEBUG |
#ifdef DEBUG |
60 |
static const char *OP_names[] = { |
static const char *OP_names[] = { |
65 |
"*", "*?", "+", "+?", "?", "??", "{", "{", "{", |
"*", "*?", "+", "+?", "?", "??", "{", "{", "{", |
66 |
"*", "*?", "+", "+?", "?", "??", "{", "{", "{", |
"*", "*?", "+", "+?", "?", "??", "{", "{", "{", |
67 |
"*", "*?", "+", "+?", "?", "??", "{", "{", |
"*", "*?", "+", "+?", "?", "??", "{", "{", |
68 |
"class", "Ref", |
"class", "negclass", "Ref", |
69 |
"Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", "Once", |
"Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", "Once", |
70 |
"Brazero", "Braminzero", "Bra" |
"Brazero", "Braminzero", "Bra" |
71 |
}; |
}; |
91 |
|
|
92 |
/* Definition to allow mutual recursion */ |
/* Definition to allow mutual recursion */ |
93 |
|
|
94 |
static BOOL compile_regex(int, int *, uschar **, const uschar **, const char **); |
static BOOL |
95 |
|
compile_regex(int, int *, uschar **, const uschar **, const char **); |
96 |
|
|
97 |
/* Structure for passing "static" information around between the functions |
/* Structure for passing "static" information around between the functions |
98 |
doing the matching, so that they are thread-safe. */ |
doing the matching, so that they are thread-safe. */ |
307 |
/* Check a class or a back reference for a zero minimum */ |
/* Check a class or a back reference for a zero minimum */ |
308 |
|
|
309 |
case OP_CLASS: |
case OP_CLASS: |
310 |
|
case OP_NEGCLASS: |
311 |
case OP_REF: |
case OP_REF: |
312 |
cc += (*cc == OP_REF)? 2 : 33; |
cc += (*cc == OP_REF)? 2 : 33; |
313 |
|
|
672 |
|
|
673 |
case '[': |
case '[': |
674 |
previous = code; |
previous = code; |
|
*code++ = OP_CLASS; |
|
675 |
|
|
676 |
/* If the first character is '^', set the negation flag */ |
/* If the first character is '^', set the negation flag, and use a |
677 |
|
different opcode. This only matters if caseless matching is specified at |
678 |
|
runtime. */ |
679 |
|
|
680 |
if ((c = *(++ptr)) == '^') |
if ((c = *(++ptr)) == '^') |
681 |
{ |
{ |
682 |
negate_class = TRUE; |
negate_class = TRUE; |
683 |
|
*code++ = OP_NEGCLASS; |
684 |
c = *(++ptr); |
c = *(++ptr); |
685 |
} |
} |
686 |
else negate_class = FALSE; |
else |
687 |
|
{ |
688 |
|
negate_class = FALSE; |
689 |
|
*code++ = OP_CLASS; |
690 |
|
} |
691 |
|
|
692 |
/* Keep a count of chars so that we can optimize the case of just a single |
/* Keep a count of chars so that we can optimize the case of just a single |
693 |
character. */ |
character. */ |
1023 |
/* If previous was a character class or a back reference, we put the repeat |
/* If previous was a character class or a back reference, we put the repeat |
1024 |
stuff after it. */ |
stuff after it. */ |
1025 |
|
|
1026 |
else if (*previous == OP_CLASS || *previous == OP_REF) |
else if (*previous == OP_CLASS || *previous == OP_NEGCLASS || |
1027 |
|
*previous == OP_REF) |
1028 |
{ |
{ |
1029 |
if (repeat_min == 0 && repeat_max == -1) |
if (repeat_min == 0 && repeat_max == -1) |
1030 |
*code++ = OP_CRSTAR + repeat_type; |
*code++ = OP_CRSTAR + repeat_type; |
2050 |
case OP_MINUPTO: |
case OP_MINUPTO: |
2051 |
if (isprint(c = code[3])) printf(" %c{", c); |
if (isprint(c = code[3])) printf(" %c{", c); |
2052 |
else printf(" \\x%02x{", c); |
else printf(" \\x%02x{", c); |
2053 |
if (*code != OP_EXACT) printf(","); |
if (*code != OP_EXACT) printf("0,"); |
2054 |
printf("%d}", (code[1] << 8) + code[2]); |
printf("%d}", (code[1] << 8) + code[2]); |
2055 |
if (*code == OP_MINUPTO) printf("?"); |
if (*code == OP_MINUPTO) printf("?"); |
2056 |
code += 3; |
code += 3; |
2099 |
goto CLASS_REF_REPEAT; |
goto CLASS_REF_REPEAT; |
2100 |
|
|
2101 |
case OP_CLASS: |
case OP_CLASS: |
2102 |
|
case OP_NEGCLASS: |
2103 |
{ |
{ |
2104 |
int i, min, max; |
int i, min, max; |
2105 |
|
|
2106 |
code++; |
if (*code++ == OP_CLASS) printf(" ["); |
2107 |
printf(" ["); |
else printf(" ^["); |
2108 |
|
|
2109 |
for (i = 0; i < 256; i++) |
for (i = 0; i < 256; i++) |
2110 |
{ |
{ |
2724 |
item to see if there is repeat information following. Then obey similar |
item to see if there is repeat information following. Then obey similar |
2725 |
code to character type repeats - written out again for speed. If caseless |
code to character type repeats - written out again for speed. If caseless |
2726 |
matching was set at runtime but not at compile time, we have to check both |
matching was set at runtime but not at compile time, we have to check both |
2727 |
versions of a character. */ |
versions of a character, and we have to behave differently for positive and |
2728 |
|
negative classes. This is the only time where OP_CLASS and OP_NEGCLASS are |
2729 |
|
treated differently. */ |
2730 |
|
|
2731 |
case OP_CLASS: |
case OP_CLASS: |
2732 |
|
case OP_NEGCLASS: |
2733 |
{ |
{ |
2734 |
|
BOOL nasty_case = *ecode == OP_NEGCLASS && md->runtime_caseless; |
2735 |
const uschar *data = ecode + 1; /* Save for matching */ |
const uschar *data = ecode + 1; /* Save for matching */ |
2736 |
ecode += 33; /* Advance past the item */ |
ecode += 33; /* Advance past the item */ |
2737 |
|
|
2760 |
break; |
break; |
2761 |
|
|
2762 |
default: /* No repeat follows */ |
default: /* No repeat follows */ |
2763 |
if (eptr >= md->end_subject) return FALSE; |
min = max = 1; |
2764 |
c = *eptr++; |
break; |
|
if ((data[c/8] & (1 << (c&7))) != 0) continue; /* With main loop */ |
|
|
if (md->runtime_caseless) |
|
|
{ |
|
|
c = pcre_fcc[c]; |
|
|
if ((data[c/8] & (1 << (c&7))) != 0) continue; /* With main loop */ |
|
|
} |
|
|
return FALSE; |
|
2765 |
} |
} |
2766 |
|
|
2767 |
/* First, ensure the minimum number of matches are present. */ |
/* First, ensure the minimum number of matches are present. */ |
2770 |
{ |
{ |
2771 |
if (eptr >= md->end_subject) return FALSE; |
if (eptr >= md->end_subject) return FALSE; |
2772 |
c = *eptr++; |
c = *eptr++; |
2773 |
if ((data[c/8] & (1 << (c&7))) != 0) continue; |
|
2774 |
if (md->runtime_caseless) |
/* Either not runtime caseless, or it was a positive class. For |
2775 |
|
runtime caseless, continue if either case is in the map. */ |
2776 |
|
|
2777 |
|
if (!nasty_case) |
2778 |
{ |
{ |
2779 |
|
if ((data[c/8] & (1 << (c&7))) != 0) continue; |
2780 |
|
if (md->runtime_caseless) |
2781 |
|
{ |
2782 |
|
c = pcre_fcc[c]; |
2783 |
|
if ((data[c/8] & (1 << (c&7))) != 0) continue; |
2784 |
|
} |
2785 |
|
} |
2786 |
|
|
2787 |
|
/* Runtime caseless and it was a negative class. Continue only if |
2788 |
|
both cases are in the map. */ |
2789 |
|
|
2790 |
|
else |
2791 |
|
{ |
2792 |
|
if ((data[c/8] & (1 << (c&7))) == 0) return FALSE; |
2793 |
c = pcre_fcc[c]; |
c = pcre_fcc[c]; |
2794 |
if ((data[c/8] & (1 << (c&7))) != 0) continue; |
if ((data[c/8] & (1 << (c&7))) != 0) continue; |
2795 |
} |
} |
2796 |
|
|
2797 |
return FALSE; |
return FALSE; |
2798 |
} |
} |
2799 |
|
|
2812 |
if (match(eptr, ecode, offset_top, md)) return TRUE; |
if (match(eptr, ecode, offset_top, md)) return TRUE; |
2813 |
if (i >= max || eptr >= md->end_subject) return FALSE; |
if (i >= max || eptr >= md->end_subject) return FALSE; |
2814 |
c = *eptr++; |
c = *eptr++; |
2815 |
if ((data[c/8] & (1 << (c&7))) != 0) continue; |
|
2816 |
if (md->runtime_caseless) |
/* Either not runtime caseless, or it was a positive class. For |
2817 |
|
runtime caseless, continue if either case is in the map. */ |
2818 |
|
|
2819 |
|
if (!nasty_case) |
2820 |
{ |
{ |
2821 |
|
if ((data[c/8] & (1 << (c&7))) != 0) continue; |
2822 |
|
if (md->runtime_caseless) |
2823 |
|
{ |
2824 |
|
c = pcre_fcc[c]; |
2825 |
|
if ((data[c/8] & (1 << (c&7))) != 0) continue; |
2826 |
|
} |
2827 |
|
} |
2828 |
|
|
2829 |
|
/* Runtime caseless and it was a negative class. Continue only if |
2830 |
|
both cases are in the map. */ |
2831 |
|
|
2832 |
|
else |
2833 |
|
{ |
2834 |
|
if ((data[c/8] & (1 << (c&7))) == 0) return FALSE; |
2835 |
c = pcre_fcc[c]; |
c = pcre_fcc[c]; |
2836 |
if ((data[c/8] & (1 << (c&7))) != 0) continue; |
if ((data[c/8] & (1 << (c&7))) != 0) continue; |
2837 |
} |
} |
2838 |
|
|
2839 |
return FALSE; |
return FALSE; |
2840 |
} |
} |
2841 |
/* Control never gets here */ |
/* Control never gets here */ |
2850 |
{ |
{ |
2851 |
if (eptr >= md->end_subject) break; |
if (eptr >= md->end_subject) break; |
2852 |
c = *eptr; |
c = *eptr; |
2853 |
if ((data[c/8] & (1 << (c&7))) != 0) continue; |
|
2854 |
if (md->runtime_caseless) |
/* Either not runtime caseless, or it was a positive class. For |
2855 |
|
runtime caseless, continue if either case is in the map. */ |
2856 |
|
|
2857 |
|
if (!nasty_case) |
2858 |
{ |
{ |
2859 |
|
if ((data[c/8] & (1 << (c&7))) != 0) continue; |
2860 |
|
if (md->runtime_caseless) |
2861 |
|
{ |
2862 |
|
c = pcre_fcc[c]; |
2863 |
|
if ((data[c/8] & (1 << (c&7))) != 0) continue; |
2864 |
|
} |
2865 |
|
} |
2866 |
|
|
2867 |
|
/* Runtime caseless and it was a negative class. Continue only if |
2868 |
|
both cases are in the map. */ |
2869 |
|
|
2870 |
|
else |
2871 |
|
{ |
2872 |
|
if ((data[c/8] & (1 << (c&7))) == 0) break; |
2873 |
c = pcre_fcc[c]; |
c = pcre_fcc[c]; |
2874 |
if ((data[c/8] & (1 << (c&7))) != 0) continue; |
if ((data[c/8] & (1 << (c&7))) != 0) continue; |
2875 |
} |
} |
2876 |
|
|
2877 |
break; |
break; |
2878 |
} |
} |
2879 |
|
|
3360 |
since it's needed only for the extension \X option, and with any luck, a good |
since it's needed only for the extension \X option, and with any luck, a good |
3361 |
compiler will spot the tail recursion and compile it efficiently. |
compiler will spot the tail recursion and compile it efficiently. |
3362 |
|
|
3363 |
Arguments: The block containing the match data |
Arguments: |
3364 |
Returns: The return from setjump() |
eptr pointer in subject |
3365 |
|
ecode position in code |
3366 |
|
offset_top current top pointer |
3367 |
|
md pointer to "static" info for the match |
3368 |
|
|
3369 |
|
Returns: TRUE if matched |
3370 |
*/ |
*/ |
3371 |
|
|
3372 |
static int |
static BOOL |
3373 |
my_setjmp(match_data *match_block) |
match_with_setjmp(const uschar *eptr, const uschar *ecode, int offset_top, |
3374 |
|
match_data *match_block) |
3375 |
{ |
{ |
3376 |
return setjmp(match_block->fail_env); |
return setjmp(match_block->fail_env) == 0 && |
3377 |
|
match(eptr, ecode, offset_top, match_block); |
3378 |
} |
} |
3379 |
|
|
3380 |
|
|
3406 |
pcre_exec(const pcre *external_re, const pcre_extra *external_extra, |
pcre_exec(const pcre *external_re, const pcre_extra *external_extra, |
3407 |
const char *subject, int length, int options, int *offsets, int offsetcount) |
const char *subject, int length, int options, int *offsets, int offsetcount) |
3408 |
{ |
{ |
3409 |
int resetcount; |
int resetcount, ocount; |
|
int ocount = offsetcount; |
|
3410 |
int first_char = -1; |
int first_char = -1; |
3411 |
match_data match_block; |
match_data match_block; |
3412 |
const uschar *start_bits = NULL; |
const uschar *start_bits = NULL; |
3414 |
const uschar *end_subject; |
const uschar *end_subject; |
3415 |
const real_pcre *re = (const real_pcre *)external_re; |
const real_pcre *re = (const real_pcre *)external_re; |
3416 |
const real_pcre_extra *extra = (const real_pcre_extra *)external_extra; |
const real_pcre_extra *extra = (const real_pcre_extra *)external_extra; |
3417 |
|
BOOL using_temporary_offsets = FALSE; |
3418 |
BOOL anchored = ((re->options | options) & PCRE_ANCHORED) != 0; |
BOOL anchored = ((re->options | options) & PCRE_ANCHORED) != 0; |
3419 |
BOOL startline = (re->options & PCRE_STARTLINE) != 0; |
BOOL startline = (re->options & PCRE_STARTLINE) != 0; |
3420 |
|
|
3443 |
|
|
3444 |
/* If the expression has got more back references than the offsets supplied can |
/* If the expression has got more back references than the offsets supplied can |
3445 |
hold, we get a temporary bit of working store to use during the matching. |
hold, we get a temporary bit of working store to use during the matching. |
3446 |
Otherwise, we can use the vector supplied, rounding down the size of it to a |
Otherwise, we can use the vector supplied, rounding down its size to a multiple |
3447 |
multiple of 2. */ |
of 2. */ |
3448 |
|
|
3449 |
ocount &= (-2); |
ocount = offsetcount & (-2); |
3450 |
if (re->top_backref > 0 && re->top_backref + 1 >= ocount/2) |
if (re->top_backref > 0 && re->top_backref >= ocount/2) |
3451 |
{ |
{ |
3452 |
ocount = re->top_backref * 2 + 2; |
ocount = re->top_backref * 2 + 2; |
3453 |
match_block.offset_vector = (pcre_malloc)(ocount * sizeof(int)); |
match_block.offset_vector = (pcre_malloc)(ocount * sizeof(int)); |
3454 |
if (match_block.offset_vector == NULL) return PCRE_ERROR_NOMEMORY; |
if (match_block.offset_vector == NULL) return PCRE_ERROR_NOMEMORY; |
3455 |
|
using_temporary_offsets = TRUE; |
3456 |
DPRINTF(("Got memory to hold back references\n")); |
DPRINTF(("Got memory to hold back references\n")); |
3457 |
} |
} |
3458 |
else match_block.offset_vector = offsets; |
else match_block.offset_vector = offsets; |
3567 |
it unless PCRE_EXTRA is set, since only in that case is the "cut" operation |
it unless PCRE_EXTRA is set, since only in that case is the "cut" operation |
3568 |
enabled. */ |
enabled. */ |
3569 |
|
|
3570 |
if (((re->options & PCRE_EXTRA) != 0 && my_setjmp(&match_block) != 0) || |
if ((re->options & PCRE_EXTRA) != 0) |
3571 |
!match(start_match, re->code, 2, &match_block)) |
{ |
3572 |
continue; |
if (!match_with_setjmp(start_match, re->code, 2, &match_block)) |
3573 |
|
continue; |
3574 |
|
} |
3575 |
|
else if (!match(start_match, re->code, 2, &match_block)) continue; |
3576 |
|
|
3577 |
/* Copy the offset information from temporary store if necessary */ |
/* Copy the offset information from temporary store if necessary */ |
3578 |
|
|
3579 |
if (ocount != offsetcount) |
if (using_temporary_offsets) |
3580 |
{ |
{ |
3581 |
if (offsetcount >= 4) |
if (offsetcount >= 4) |
3582 |
{ |
{ |
3583 |
memcpy(offsets + 2, match_block.offset_vector + 2, |
memcpy(offsets + 2, match_block.offset_vector + 2, |
3584 |
(offsetcount - 2) * sizeof(int)); |
(offsetcount - 2) * sizeof(int)); |
3585 |
DPRINTF(("Copied offsets; freeing temporary memory\n")); |
DPRINTF(("Copied offsets from temporary memory\n")); |
3586 |
} |
} |
3587 |
if (match_block.end_offset_top > offsetcount) |
if (match_block.end_offset_top > offsetcount) |
3588 |
match_block.offset_overflow = TRUE; |
match_block.offset_overflow = TRUE; |
3606 |
match_block.errorcode == PCRE_ERROR_NOMATCH && |
match_block.errorcode == PCRE_ERROR_NOMATCH && |
3607 |
start_match++ < end_subject); |
start_match++ < end_subject); |
3608 |
|
|
3609 |
|
if (using_temporary_offsets) |
3610 |
|
{ |
3611 |
|
DPRINTF(("Freeing temporary memory\n")); |
3612 |
|
(pcre_free)(match_block.offset_vector); |
3613 |
|
} |
3614 |
|
|
3615 |
DPRINTF((">>>> returning %d\n", match_block.errorcode)); |
DPRINTF((">>>> returning %d\n", match_block.errorcode)); |
3616 |
|
|
3617 |
return match_block.errorcode; |
return match_block.errorcode; |