9 |
|
|
10 |
Written by: Philip Hazel <ph10@cam.ac.uk> |
Written by: Philip Hazel <ph10@cam.ac.uk> |
11 |
|
|
12 |
Copyright (c) 1997 University of Cambridge |
Copyright (c) 1998 University of Cambridge |
13 |
|
|
14 |
----------------------------------------------------------------------------- |
----------------------------------------------------------------------------- |
15 |
Permission is granted to anyone to use this software for any purpose on any |
Permission is granted to anyone to use this software for any purpose on any |
49 |
#include "internal.h" |
#include "internal.h" |
50 |
|
|
51 |
|
|
52 |
|
/* Allow compilation as C++ source code, should anybody want to do that. */ |
53 |
|
|
54 |
|
#ifdef __cplusplus |
55 |
|
#define class pcre_class |
56 |
|
#endif |
57 |
|
|
58 |
|
|
59 |
/* Min and max values for the common repeats; for the maxima, 0 => infinity */ |
/* Min and max values for the common repeats; for the maxima, 0 => infinity */ |
60 |
|
|
61 |
static char rep_min[] = { 0, 0, 1, 1, 0, 0 }; |
static const char rep_min[] = { 0, 0, 1, 1, 0, 0 }; |
62 |
static char rep_max[] = { 0, 0, 0, 0, 1, 1 }; |
static const char rep_max[] = { 0, 0, 0, 0, 1, 1 }; |
63 |
|
|
64 |
/* Text forms of OP_ values and things, for debugging */ |
/* Text forms of OP_ values and things, for debugging (not all used) */ |
65 |
|
|
66 |
#ifdef DEBUG |
#ifdef DEBUG |
67 |
static const char *OP_names[] = { |
static const char *OP_names[] = { |
72 |
"*", "*?", "+", "+?", "?", "??", "{", "{", "{", |
"*", "*?", "+", "+?", "?", "??", "{", "{", "{", |
73 |
"*", "*?", "+", "+?", "?", "??", "{", "{", "{", |
"*", "*?", "+", "+?", "?", "??", "{", "{", "{", |
74 |
"*", "*?", "+", "+?", "?", "??", "{", "{", |
"*", "*?", "+", "+?", "?", "??", "{", "{", |
75 |
"class", "Ref", |
"class", "negclass", "Ref", |
76 |
"Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", "Once", |
"Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", "Once", |
77 |
"Brazero", "Braminzero", "Bra" |
"Brazero", "Braminzero", "Bra" |
78 |
}; |
}; |
83 |
on. Zero means further processing is needed (for things like \x), or the escape |
on. Zero means further processing is needed (for things like \x), or the escape |
84 |
is invalid. */ |
is invalid. */ |
85 |
|
|
86 |
static short int escapes[] = { |
static const short int escapes[] = { |
87 |
0, 0, 0, 0, 0, 0, 0, 0, /* 0 - 7 */ |
0, 0, 0, 0, 0, 0, 0, 0, /* 0 - 7 */ |
88 |
0, 0, ':', ';', '<', '=', '>', '?', /* 8 - ? */ |
0, 0, ':', ';', '<', '=', '>', '?', /* 8 - ? */ |
89 |
'@', -ESC_A, -ESC_B, 0, -ESC_D, 0, 0, 0, /* @ - G */ |
'@', -ESC_A, -ESC_B, 0, -ESC_D, 0, 0, 0, /* @ - G */ |
98 |
|
|
99 |
/* Definition to allow mutual recursion */ |
/* Definition to allow mutual recursion */ |
100 |
|
|
101 |
static BOOL compile_regex(int, int *, uschar **, const uschar **, const char **); |
static BOOL |
102 |
|
compile_regex(int, int *, uschar **, const uschar **, const char **); |
103 |
|
|
104 |
/* Structure for passing "static" information around between the functions |
/* Structure for passing "static" information around between the functions |
105 |
doing the matching, so that they are thread-safe. */ |
doing the matching, so that they are thread-safe. */ |
264 |
case OP_KETRMIN: |
case OP_KETRMIN: |
265 |
return TRUE; |
return TRUE; |
266 |
|
|
267 |
|
/* Skip over entire bracket groups with zero lower bound */ |
268 |
|
|
269 |
|
case OP_BRAZERO: |
270 |
|
case OP_BRAMINZERO: |
271 |
|
cc++; |
272 |
|
/* Fall through */ |
273 |
|
|
274 |
/* Skip over assertive subpatterns */ |
/* Skip over assertive subpatterns */ |
275 |
|
|
276 |
case OP_ASSERT: |
case OP_ASSERT: |
285 |
case OP_EOD: |
case OP_EOD: |
286 |
case OP_CIRC: |
case OP_CIRC: |
287 |
case OP_DOLL: |
case OP_DOLL: |
|
case OP_BRAZERO: |
|
|
case OP_BRAMINZERO: |
|
288 |
case OP_NOT_WORD_BOUNDARY: |
case OP_NOT_WORD_BOUNDARY: |
289 |
case OP_WORD_BOUNDARY: |
case OP_WORD_BOUNDARY: |
290 |
cc++; |
cc++; |
319 |
/* Check a class or a back reference for a zero minimum */ |
/* Check a class or a back reference for a zero minimum */ |
320 |
|
|
321 |
case OP_CLASS: |
case OP_CLASS: |
322 |
|
case OP_NEGCLASS: |
323 |
case OP_REF: |
case OP_REF: |
324 |
cc += (*cc == OP_REF)? 2 : 33; |
cc += (*cc == OP_REF)? 2 : 33; |
325 |
|
|
684 |
|
|
685 |
case '[': |
case '[': |
686 |
previous = code; |
previous = code; |
|
*code++ = OP_CLASS; |
|
687 |
|
|
688 |
/* If the first character is '^', set the negation flag */ |
/* If the first character is '^', set the negation flag, and use a |
689 |
|
different opcode. This only matters if caseless matching is specified at |
690 |
|
runtime. */ |
691 |
|
|
692 |
if ((c = *(++ptr)) == '^') |
if ((c = *(++ptr)) == '^') |
693 |
{ |
{ |
694 |
negate_class = TRUE; |
negate_class = TRUE; |
695 |
|
*code++ = OP_NEGCLASS; |
696 |
c = *(++ptr); |
c = *(++ptr); |
697 |
} |
} |
698 |
else negate_class = FALSE; |
else |
699 |
|
{ |
700 |
|
negate_class = FALSE; |
701 |
|
*code++ = OP_CLASS; |
702 |
|
} |
703 |
|
|
704 |
/* Keep a count of chars so that we can optimize the case of just a single |
/* Keep a count of chars so that we can optimize the case of just a single |
705 |
character. */ |
character. */ |
1035 |
/* If previous was a character class or a back reference, we put the repeat |
/* If previous was a character class or a back reference, we put the repeat |
1036 |
stuff after it. */ |
stuff after it. */ |
1037 |
|
|
1038 |
else if (*previous == OP_CLASS || *previous == OP_REF) |
else if (*previous == OP_CLASS || *previous == OP_NEGCLASS || |
1039 |
|
*previous == OP_REF) |
1040 |
{ |
{ |
1041 |
if (repeat_min == 0 && repeat_max == -1) |
if (repeat_min == 0 && repeat_max == -1) |
1042 |
*code++ = OP_CRSTAR + repeat_type; |
*code++ = OP_CRSTAR + repeat_type; |
1308 |
the next state. */ |
the next state. */ |
1309 |
|
|
1310 |
previous[1] = length; |
previous[1] = length; |
1311 |
ptr--; |
if (length < 255) ptr--; |
1312 |
break; |
break; |
1313 |
} |
} |
1314 |
} /* end of big loop */ |
} /* end of big loop */ |
2111 |
goto CLASS_REF_REPEAT; |
goto CLASS_REF_REPEAT; |
2112 |
|
|
2113 |
case OP_CLASS: |
case OP_CLASS: |
2114 |
|
case OP_NEGCLASS: |
2115 |
{ |
{ |
2116 |
int i, min, max; |
int i, min, max; |
2117 |
|
|
2118 |
code++; |
if (*code++ == OP_CLASS) printf(" ["); |
2119 |
printf(" ["); |
else printf(" ^["); |
2120 |
|
|
2121 |
for (i = 0; i < 256; i++) |
for (i = 0; i < 256; i++) |
2122 |
{ |
{ |
2736 |
item to see if there is repeat information following. Then obey similar |
item to see if there is repeat information following. Then obey similar |
2737 |
code to character type repeats - written out again for speed. If caseless |
code to character type repeats - written out again for speed. If caseless |
2738 |
matching was set at runtime but not at compile time, we have to check both |
matching was set at runtime but not at compile time, we have to check both |
2739 |
versions of a character. */ |
versions of a character, and we have to behave differently for positive and |
2740 |
|
negative classes. This is the only time where OP_CLASS and OP_NEGCLASS are |
2741 |
|
treated differently. */ |
2742 |
|
|
2743 |
case OP_CLASS: |
case OP_CLASS: |
2744 |
|
case OP_NEGCLASS: |
2745 |
{ |
{ |
2746 |
|
BOOL nasty_case = *ecode == OP_NEGCLASS && md->runtime_caseless; |
2747 |
const uschar *data = ecode + 1; /* Save for matching */ |
const uschar *data = ecode + 1; /* Save for matching */ |
2748 |
ecode += 33; /* Advance past the item */ |
ecode += 33; /* Advance past the item */ |
2749 |
|
|
2772 |
break; |
break; |
2773 |
|
|
2774 |
default: /* No repeat follows */ |
default: /* No repeat follows */ |
2775 |
if (eptr >= md->end_subject) return FALSE; |
min = max = 1; |
2776 |
c = *eptr++; |
break; |
|
if ((data[c/8] & (1 << (c&7))) != 0) continue; /* With main loop */ |
|
|
if (md->runtime_caseless) |
|
|
{ |
|
|
c = pcre_fcc[c]; |
|
|
if ((data[c/8] & (1 << (c&7))) != 0) continue; /* With main loop */ |
|
|
} |
|
|
return FALSE; |
|
2777 |
} |
} |
2778 |
|
|
2779 |
/* First, ensure the minimum number of matches are present. */ |
/* First, ensure the minimum number of matches are present. */ |
2782 |
{ |
{ |
2783 |
if (eptr >= md->end_subject) return FALSE; |
if (eptr >= md->end_subject) return FALSE; |
2784 |
c = *eptr++; |
c = *eptr++; |
2785 |
if ((data[c/8] & (1 << (c&7))) != 0) continue; |
|
2786 |
if (md->runtime_caseless) |
/* Either not runtime caseless, or it was a positive class. For |
2787 |
|
runtime caseless, continue if either case is in the map. */ |
2788 |
|
|
2789 |
|
if (!nasty_case) |
2790 |
{ |
{ |
2791 |
|
if ((data[c/8] & (1 << (c&7))) != 0) continue; |
2792 |
|
if (md->runtime_caseless) |
2793 |
|
{ |
2794 |
|
c = pcre_fcc[c]; |
2795 |
|
if ((data[c/8] & (1 << (c&7))) != 0) continue; |
2796 |
|
} |
2797 |
|
} |
2798 |
|
|
2799 |
|
/* Runtime caseless and it was a negative class. Continue only if |
2800 |
|
both cases are in the map. */ |
2801 |
|
|
2802 |
|
else |
2803 |
|
{ |
2804 |
|
if ((data[c/8] & (1 << (c&7))) == 0) return FALSE; |
2805 |
c = pcre_fcc[c]; |
c = pcre_fcc[c]; |
2806 |
if ((data[c/8] & (1 << (c&7))) != 0) continue; |
if ((data[c/8] & (1 << (c&7))) != 0) continue; |
2807 |
} |
} |
2808 |
|
|
2809 |
return FALSE; |
return FALSE; |
2810 |
} |
} |
2811 |
|
|
2824 |
if (match(eptr, ecode, offset_top, md)) return TRUE; |
if (match(eptr, ecode, offset_top, md)) return TRUE; |
2825 |
if (i >= max || eptr >= md->end_subject) return FALSE; |
if (i >= max || eptr >= md->end_subject) return FALSE; |
2826 |
c = *eptr++; |
c = *eptr++; |
2827 |
if ((data[c/8] & (1 << (c&7))) != 0) continue; |
|
2828 |
if (md->runtime_caseless) |
/* Either not runtime caseless, or it was a positive class. For |
2829 |
|
runtime caseless, continue if either case is in the map. */ |
2830 |
|
|
2831 |
|
if (!nasty_case) |
2832 |
|
{ |
2833 |
|
if ((data[c/8] & (1 << (c&7))) != 0) continue; |
2834 |
|
if (md->runtime_caseless) |
2835 |
|
{ |
2836 |
|
c = pcre_fcc[c]; |
2837 |
|
if ((data[c/8] & (1 << (c&7))) != 0) continue; |
2838 |
|
} |
2839 |
|
} |
2840 |
|
|
2841 |
|
/* Runtime caseless and it was a negative class. Continue only if |
2842 |
|
both cases are in the map. */ |
2843 |
|
|
2844 |
|
else |
2845 |
{ |
{ |
2846 |
|
if ((data[c/8] & (1 << (c&7))) == 0) return FALSE; |
2847 |
c = pcre_fcc[c]; |
c = pcre_fcc[c]; |
2848 |
if ((data[c/8] & (1 << (c&7))) != 0) continue; |
if ((data[c/8] & (1 << (c&7))) != 0) continue; |
2849 |
} |
} |
2850 |
|
|
2851 |
return FALSE; |
return FALSE; |
2852 |
} |
} |
2853 |
/* Control never gets here */ |
/* Control never gets here */ |
2862 |
{ |
{ |
2863 |
if (eptr >= md->end_subject) break; |
if (eptr >= md->end_subject) break; |
2864 |
c = *eptr; |
c = *eptr; |
2865 |
if ((data[c/8] & (1 << (c&7))) != 0) continue; |
|
2866 |
if (md->runtime_caseless) |
/* Either not runtime caseless, or it was a positive class. For |
2867 |
|
runtime caseless, continue if either case is in the map. */ |
2868 |
|
|
2869 |
|
if (!nasty_case) |
2870 |
{ |
{ |
2871 |
|
if ((data[c/8] & (1 << (c&7))) != 0) continue; |
2872 |
|
if (md->runtime_caseless) |
2873 |
|
{ |
2874 |
|
c = pcre_fcc[c]; |
2875 |
|
if ((data[c/8] & (1 << (c&7))) != 0) continue; |
2876 |
|
} |
2877 |
|
} |
2878 |
|
|
2879 |
|
/* Runtime caseless and it was a negative class. Continue only if |
2880 |
|
both cases are in the map. */ |
2881 |
|
|
2882 |
|
else |
2883 |
|
{ |
2884 |
|
if ((data[c/8] & (1 << (c&7))) == 0) break; |
2885 |
c = pcre_fcc[c]; |
c = pcre_fcc[c]; |
2886 |
if ((data[c/8] & (1 << (c&7))) != 0) continue; |
if ((data[c/8] & (1 << (c&7))) != 0) continue; |
2887 |
} |
} |
2888 |
|
|
2889 |
break; |
break; |
2890 |
} |
} |
2891 |
|
|
3462 |
if (re->top_backref > 0 && re->top_backref >= ocount/2) |
if (re->top_backref > 0 && re->top_backref >= ocount/2) |
3463 |
{ |
{ |
3464 |
ocount = re->top_backref * 2 + 2; |
ocount = re->top_backref * 2 + 2; |
3465 |
match_block.offset_vector = (pcre_malloc)(ocount * sizeof(int)); |
match_block.offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int)); |
3466 |
if (match_block.offset_vector == NULL) return PCRE_ERROR_NOMEMORY; |
if (match_block.offset_vector == NULL) return PCRE_ERROR_NOMEMORY; |
3467 |
using_temporary_offsets = TRUE; |
using_temporary_offsets = TRUE; |
3468 |
DPRINTF(("Got memory to hold back references\n")); |
DPRINTF(("Got memory to hold back references\n")); |