--- code/trunk/pcre.c 2007/02/24 21:38:21 13 +++ code/trunk/pcre.c 2007/02/24 21:38:33 19 @@ -9,7 +9,7 @@ Written by: Philip Hazel - Copyright (c) 1997 University of Cambridge + Copyright (c) 1998 University of Cambridge ----------------------------------------------------------------------------- Permission is granted to anyone to use this software for any purpose on any @@ -49,10 +49,17 @@ #include "internal.h" +/* Allow compilation as C++ source code, should anybody want to do that. */ + +#ifdef __cplusplus +#define class pcre_class +#endif + + /* Min and max values for the common repeats; for the maxima, 0 => infinity */ -static char rep_min[] = { 0, 0, 1, 1, 0, 0 }; -static char rep_max[] = { 0, 0, 0, 0, 1, 1 }; +static const char rep_min[] = { 0, 0, 1, 1, 0, 0 }; +static const char rep_max[] = { 0, 0, 0, 0, 1, 1 }; /* Text forms of OP_ values and things, for debugging (not all used) */ @@ -76,7 +83,7 @@ on. Zero means further processing is needed (for things like \x), or the escape is invalid. */ -static short int escapes[] = { +static const short int escapes[] = { 0, 0, 0, 0, 0, 0, 0, 0, /* 0 - 7 */ 0, 0, ':', ';', '<', '=', '>', '?', /* 8 - ? */ '@', -ESC_A, -ESC_B, 0, -ESC_D, 0, 0, 0, /* @ - G */ @@ -257,6 +264,13 @@ case OP_KETRMIN: return TRUE; + /* Skip over entire bracket groups with zero lower bound */ + + case OP_BRAZERO: + case OP_BRAMINZERO: + cc++; + /* Fall through */ + /* Skip over assertive subpatterns */ case OP_ASSERT: @@ -271,8 +285,6 @@ case OP_EOD: case OP_CIRC: case OP_DOLL: - case OP_BRAZERO: - case OP_BRAMINZERO: case OP_NOT_WORD_BOUNDARY: case OP_WORD_BOUNDARY: cc++; @@ -611,6 +623,7 @@ int repeat_type, op_type; int repeat_min, repeat_max; int bravalue, length; +int greedy_default, greedy_non_default; register int c; register uschar *code = *codeptr; const uschar *ptr = *ptrptr; @@ -618,6 +631,11 @@ uschar *previous = NULL; uschar class[32]; +/* Set up the default and non-default settings for greediness */ + +greedy_default = ((options & PCRE_UNGREEDY) != 0); +greedy_non_default = greedy_default ^ 1; + /* Switch on next character until the end of the branch */ for (;; ptr++) @@ -895,10 +913,13 @@ goto FAILED; } - /* If the next character is '?' this is a minimizing repeat. Advance to the + /* If the next character is '?' this is a minimizing repeat, by default, + but if PCRE_UNGREEDY is set, it works the other way round. Advance to the next character. */ - if (ptr[1] == '?') { repeat_type = 1; ptr++; } else repeat_type = 0; + if (ptr[1] == '?') + { repeat_type = greedy_non_default; ptr++; } + else repeat_type = greedy_default; /* If the maximum is zero then the minimum must also be zero; Perl allows this case, so we do too - by simply omitting the item altogether. */ @@ -1137,6 +1158,8 @@ case 'm': case 's': case 'x': + case 'U': + case 'X': ptr++; while (*ptr != ')') ptr++; previous = NULL; @@ -1296,7 +1319,7 @@ the next state. */ previous[1] = length; - ptr--; + if (length < 255) ptr--; break; } } /* end of big loop */ @@ -1740,7 +1763,7 @@ ptr += 2; break; } - /* Else fall thourh */ + /* Else fall through */ /* Else loop setting valid options until ) is met. Anything else is an error. */ @@ -1770,6 +1793,16 @@ length -= spaces; /* Already counted spaces */ continue; } + else if (c == 'X') + { + options |= PCRE_EXTRA; + continue; + } + else if (c == 'U') + { + options |= PCRE_UNGREEDY; + continue; + } else if (c == ')') break; *errorptr = ERR12; @@ -1975,14 +2008,15 @@ if (re->options != 0) { - printf("%s%s%s%s%s%s%s\n", + printf("%s%s%s%s%s%s%s%s\n", ((re->options & PCRE_ANCHORED) != 0)? "anchored " : "", ((re->options & PCRE_CASELESS) != 0)? "caseless " : "", ((re->options & PCRE_EXTENDED) != 0)? "extended " : "", ((re->options & PCRE_MULTILINE) != 0)? "multiline " : "", ((re->options & PCRE_DOTALL) != 0)? "dotall " : "", ((re->options & PCRE_DOLLAR_ENDONLY) != 0)? "endonly " : "", - ((re->options & PCRE_EXTRA) != 0)? "extra " : ""); + ((re->options & PCRE_EXTRA) != 0)? "extra " : "", + ((re->options & PCRE_UNGREEDY) != 0)? "ungreedy " : ""); } if ((re->options & PCRE_FIRSTSET) != 0) @@ -3450,7 +3484,7 @@ if (re->top_backref > 0 && re->top_backref >= ocount/2) { ocount = re->top_backref * 2 + 2; - match_block.offset_vector = (pcre_malloc)(ocount * sizeof(int)); + match_block.offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int)); if (match_block.offset_vector == NULL) return PCRE_ERROR_NOMEMORY; using_temporary_offsets = TRUE; DPRINTF(("Got memory to hold back references\n"));