--- code/trunk/study.c 2007/02/24 21:38:41 23 +++ code/tags/pcre-2.07/study.c 2007/02/24 21:39:11 38 @@ -9,7 +9,7 @@ Written by: Philip Hazel - Copyright (c) 1998 University of Cambridge + Copyright (c) 1997-1999 University of Cambridge ----------------------------------------------------------------------------- Permission is granted to anyone to use this software for any purpose on any @@ -25,6 +25,10 @@ 3. Altered versions must be plainly marked as such, and must not be misrepresented as being the original software. + +4. If PCRE is embedded in any software that is released under the GNU + General Purpose Licence (GPL), then the terms of that licence shall + supersede any condition above with which it is incompatible. ----------------------------------------------------------------------------- */ @@ -47,16 +51,17 @@ start_bits points to the bit map c is the character caseless the caseless flag + cd the block with char table pointers Returns: nothing */ static void -set_bit(uschar *start_bits, int c, BOOL caseless) +set_bit(uschar *start_bits, int c, BOOL caseless, compile_data *cd) { start_bits[c/8] |= (1 << (c&7)); -if (caseless && (pcre_ctypes[c] & ctype_letter) != 0) - start_bits[pcre_fcc[c]/8] |= (1 << (pcre_fcc[c]&7)); +if (caseless && (cd->ctypes[c] & ctype_letter) != 0) + start_bits[cd->fcc[c]/8] |= (1 << (cd->fcc[c]&7)); } @@ -73,15 +78,25 @@ code points to an expression start_bits points to a 32-byte table, initialized to 0 caseless the current state of the caseless flag + cd the block with char table pointers Returns: TRUE if table built, FALSE otherwise */ static BOOL -set_start_bits(const uschar *code, uschar *start_bits, BOOL caseless) +set_start_bits(const uschar *code, uschar *start_bits, BOOL caseless, + compile_data *cd) { register int c; +/* This next statement and the later reference to dummy are here in order to +trick the optimizer of the IBM C compiler for OS/2 into generating correct +code. Apparently IBM isn't going to fix the problem, and we would rather not +disable optimization (in this module it actually makes a big difference, and +the pcre module can use all the optimization it can get). */ + +volatile int dummy; + do { const uschar *tcode = code + 3; @@ -96,7 +111,8 @@ if ((int)*tcode >= OP_BRA || *tcode == OP_ASSERT) { - if (!set_start_bits(tcode, start_bits, caseless)) return FALSE; + if (!set_start_bits(tcode, start_bits, caseless, cd)) + return FALSE; } else switch(*tcode) @@ -126,7 +142,9 @@ case OP_BRAZERO: case OP_BRAMINZERO: - if (!set_start_bits(++tcode, start_bits, caseless)) return FALSE; + if (!set_start_bits(++tcode, start_bits, caseless, cd)) + return FALSE; + dummy = 1; do tcode += (tcode[1] << 8) + tcode[2]; while (*tcode == OP_ALT); tcode += 3; try_next = TRUE; @@ -138,7 +156,7 @@ case OP_MINSTAR: case OP_QUERY: case OP_MINQUERY: - set_bit(start_bits, tcode[1], caseless); + set_bit(start_bits, tcode[1], caseless, cd); tcode += 2; try_next = TRUE; break; @@ -147,7 +165,7 @@ case OP_UPTO: case OP_MINUPTO: - set_bit(start_bits, tcode[3], caseless); + set_bit(start_bits, tcode[3], caseless, cd); tcode += 4; try_next = TRUE; break; @@ -162,35 +180,39 @@ case OP_PLUS: case OP_MINPLUS: - set_bit(start_bits, tcode[1], caseless); + set_bit(start_bits, tcode[1], caseless, cd); break; /* Single character type sets the bits and stops */ case OP_NOT_DIGIT: - for (c = 0; c < 32; c++) start_bits[c] |= ~pcre_cbits[c+cbit_digit]; + for (c = 0; c < 32; c++) + start_bits[c] |= ~cd->cbits[c+cbit_digit]; break; case OP_DIGIT: - for (c = 0; c < 32; c++) start_bits[c] |= pcre_cbits[c+cbit_digit]; + for (c = 0; c < 32; c++) + start_bits[c] |= cd->cbits[c+cbit_digit]; break; case OP_NOT_WHITESPACE: - for (c = 0; c < 32; c++) start_bits[c] |= ~pcre_cbits[c+cbit_space]; + for (c = 0; c < 32; c++) + start_bits[c] |= ~cd->cbits[c+cbit_space]; break; case OP_WHITESPACE: - for (c = 0; c < 32; c++) start_bits[c] |= pcre_cbits[c+cbit_space]; + for (c = 0; c < 32; c++) + start_bits[c] |= cd->cbits[c+cbit_space]; break; case OP_NOT_WORDCHAR: for (c = 0; c < 32; c++) - start_bits[c] |= ~(pcre_cbits[c] | pcre_cbits[c+cbit_word]); + start_bits[c] |= ~(cd->cbits[c] | cd->cbits[c+cbit_word]); break; case OP_WORDCHAR: for (c = 0; c < 32; c++) - start_bits[c] |= (pcre_cbits[c] | pcre_cbits[c+cbit_word]); + start_bits[c] |= (cd->cbits[c] | cd->cbits[c+cbit_word]); break; /* One or more character type fudges the pointer and restarts, knowing @@ -221,29 +243,33 @@ switch(tcode[1]) { case OP_NOT_DIGIT: - for (c = 0; c < 32; c++) start_bits[c] |= ~pcre_cbits[c+cbit_digit]; + for (c = 0; c < 32; c++) + start_bits[c] |= ~cd->cbits[c+cbit_digit]; break; case OP_DIGIT: - for (c = 0; c < 32; c++) start_bits[c] |= pcre_cbits[c+cbit_digit]; + for (c = 0; c < 32; c++) + start_bits[c] |= cd->cbits[c+cbit_digit]; break; case OP_NOT_WHITESPACE: - for (c = 0; c < 32; c++) start_bits[c] |= ~pcre_cbits[c+cbit_space]; + for (c = 0; c < 32; c++) + start_bits[c] |= ~cd->cbits[c+cbit_space]; break; case OP_WHITESPACE: - for (c = 0; c < 32; c++) start_bits[c] |= pcre_cbits[c+cbit_space]; + for (c = 0; c < 32; c++) + start_bits[c] |= cd->cbits[c+cbit_space]; break; case OP_NOT_WORDCHAR: for (c = 0; c < 32; c++) - start_bits[c] |= ~(pcre_cbits[c] | pcre_cbits[c+cbit_word]); + start_bits[c] |= ~(cd->cbits[c] | cd->cbits[c+cbit_word]); break; case OP_WORDCHAR: for (c = 0; c < 32; c++) - start_bits[c] |= (pcre_cbits[c] | pcre_cbits[c+cbit_word]); + start_bits[c] |= (cd->cbits[c] | cd->cbits[c+cbit_word]); break; } @@ -316,6 +342,7 @@ uschar start_bits[32]; real_pcre_extra *extra; const real_pcre *re = (const real_pcre *)external_re; +compile_data compile_block; *errorptr = NULL; @@ -338,11 +365,18 @@ if ((re->options & (PCRE_ANCHORED|PCRE_FIRSTSET|PCRE_STARTLINE)) != 0) return NULL; +/* Set the character tables in the block which is passed around */ + +compile_block.lcc = re->tables + lcc_offset; +compile_block.fcc = re->tables + fcc_offset; +compile_block.cbits = re->tables + cbits_offset; +compile_block.ctypes = re->tables + ctypes_offset; + /* See if we can find a fixed set of initial characters for the pattern. */ memset(start_bits, 0, 32 * sizeof(uschar)); -if (!set_start_bits(re->code, start_bits, (re->options & PCRE_CASELESS) != 0)) - return NULL; +if (!set_start_bits(re->code, start_bits, (re->options & PCRE_CASELESS) != 0, + &compile_block)) return NULL; /* Get an "extra" block and put the information therein. */