/[pcre]/code/tags/pcre-4.0/study.c
ViewVC logotype

Diff of /code/tags/pcre-4.0/study.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 23 by nigel, Sat Feb 24 21:38:41 2007 UTC revision 53 by nigel, Sat Feb 24 21:39:42 2007 UTC
# Line 9  the file Tech.Notes for some information Line 9  the file Tech.Notes for some information
9    
10  Written by: Philip Hazel <ph10@cam.ac.uk>  Written by: Philip Hazel <ph10@cam.ac.uk>
11    
12             Copyright (c) 1998 University of Cambridge             Copyright (c) 1997-2001 University of Cambridge
13    
14  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
15  Permission is granted to anyone to use this software for any purpose on any  Permission is granted to anyone to use this software for any purpose on any
# Line 25  restrictions: Line 25  restrictions:
25    
26  3. Altered versions must be plainly marked as such, and must not be  3. Altered versions must be plainly marked as such, and must not be
27     misrepresented as being the original software.     misrepresented as being the original software.
28    
29    4. If PCRE is embedded in any software that is released under the GNU
30       General Purpose Licence (GPL), then the terms of that licence shall
31       supersede any condition above with which it is incompatible.
32  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
33  */  */
34    
# Line 47  Arguments: Line 51  Arguments:
51    start_bits    points to the bit map    start_bits    points to the bit map
52    c             is the character    c             is the character
53    caseless      the caseless flag    caseless      the caseless flag
54      cd            the block with char table pointers
55    
56  Returns:        nothing  Returns:        nothing
57  */  */
58    
59  static void  static void
60  set_bit(uschar *start_bits, int c, BOOL caseless)  set_bit(uschar *start_bits, int c, BOOL caseless, compile_data *cd)
61  {  {
62  start_bits[c/8] |= (1 << (c&7));  start_bits[c/8] |= (1 << (c&7));
63  if (caseless && (pcre_ctypes[c] & ctype_letter) != 0)  if (caseless && (cd->ctypes[c] & ctype_letter) != 0)
64    start_bits[pcre_fcc[c]/8] |= (1 << (pcre_fcc[c]&7));    start_bits[cd->fcc[c]/8] |= (1 << (cd->fcc[c]&7));
65  }  }
66    
67    
# Line 73  Arguments: Line 78  Arguments:
78    code         points to an expression    code         points to an expression
79    start_bits   points to a 32-byte table, initialized to 0    start_bits   points to a 32-byte table, initialized to 0
80    caseless     the current state of the caseless flag    caseless     the current state of the caseless flag
81      cd           the block with char table pointers
82    
83  Returns:       TRUE if table built, FALSE otherwise  Returns:       TRUE if table built, FALSE otherwise
84  */  */
85    
86  static BOOL  static BOOL
87  set_start_bits(const uschar *code, uschar *start_bits, BOOL caseless)  set_start_bits(const uschar *code, uschar *start_bits, BOOL caseless,
88      compile_data *cd)
89  {  {
90  register int c;  register int c;
91    
92    /* This next statement and the later reference to dummy are here in order to
93    trick the optimizer of the IBM C compiler for OS/2 into generating correct
94    code. Apparently IBM isn't going to fix the problem, and we would rather not
95    disable optimization (in this module it actually makes a big difference, and
96    the pcre module can use all the optimization it can get). */
97    
98    volatile int dummy;
99    
100  do  do
101    {    {
102    const uschar *tcode = code + 3;    const uschar *tcode = code + 3;
# Line 89  do Line 104  do
104    
105    while (try_next)    while (try_next)
106      {      {
     try_next = FALSE;  
   
107      /* If a branch starts with a bracket or a positive lookahead assertion,      /* If a branch starts with a bracket or a positive lookahead assertion,
108      recurse to set bits from within them. That's all for this branch. */      recurse to set bits from within them. That's all for this branch. */
109    
110      if ((int)*tcode >= OP_BRA || *tcode == OP_ASSERT)      if ((int)*tcode >= OP_BRA || *tcode == OP_ASSERT)
111        {        {
112        if (!set_start_bits(tcode, start_bits, caseless)) return FALSE;        if (!set_start_bits(tcode, start_bits, caseless, cd))
113            return FALSE;
114          try_next = FALSE;
115        }        }
116    
117      else switch(*tcode)      else switch(*tcode)
# Line 104  do Line 119  do
119        default:        default:
120        return FALSE;        return FALSE;
121    
122          /* Skip over extended extraction bracket number */
123    
124          case OP_BRANUMBER:
125          tcode += 3;
126          break;
127    
128        /* Skip over lookbehind and negative lookahead assertions */        /* Skip over lookbehind and negative lookahead assertions */
129    
130        case OP_ASSERT_NOT:        case OP_ASSERT_NOT:
131        case OP_ASSERTBACK:        case OP_ASSERTBACK:
132        case OP_ASSERTBACK_NOT:        case OP_ASSERTBACK_NOT:
       try_next = TRUE;  
133        do tcode += (tcode[1] << 8) + tcode[2]; while (*tcode == OP_ALT);        do tcode += (tcode[1] << 8) + tcode[2]; while (*tcode == OP_ALT);
134        tcode += 3;        tcode += 3;
135        break;        break;
# Line 119  do Line 139  do
139        case OP_OPT:        case OP_OPT:
140        caseless = (tcode[1] & PCRE_CASELESS) != 0;        caseless = (tcode[1] & PCRE_CASELESS) != 0;
141        tcode += 2;        tcode += 2;
       try_next = TRUE;  
142        break;        break;
143    
144        /* BRAZERO does the bracket, but carries on. */        /* BRAZERO does the bracket, but carries on. */
145    
146        case OP_BRAZERO:        case OP_BRAZERO:
147        case OP_BRAMINZERO:        case OP_BRAMINZERO:
148        if (!set_start_bits(++tcode, start_bits, caseless)) return FALSE;        if (!set_start_bits(++tcode, start_bits, caseless, cd))
149            return FALSE;
150          dummy = 1;
151        do tcode += (tcode[1] << 8) + tcode[2]; while (*tcode == OP_ALT);        do tcode += (tcode[1] << 8) + tcode[2]; while (*tcode == OP_ALT);
152        tcode += 3;        tcode += 3;
       try_next = TRUE;  
153        break;        break;
154    
155        /* Single-char * or ? sets the bit and tries the next item */        /* Single-char * or ? sets the bit and tries the next item */
# Line 138  do Line 158  do
158        case OP_MINSTAR:        case OP_MINSTAR:
159        case OP_QUERY:        case OP_QUERY:
160        case OP_MINQUERY:        case OP_MINQUERY:
161        set_bit(start_bits, tcode[1], caseless);        set_bit(start_bits, tcode[1], caseless, cd);
162        tcode += 2;        tcode += 2;
       try_next = TRUE;  
163        break;        break;
164    
165        /* Single-char upto sets the bit and tries the next */        /* Single-char upto sets the bit and tries the next */
166    
167        case OP_UPTO:        case OP_UPTO:
168        case OP_MINUPTO:        case OP_MINUPTO:
169        set_bit(start_bits, tcode[3], caseless);        set_bit(start_bits, tcode[3], caseless, cd);
170        tcode += 4;        tcode += 4;
       try_next = TRUE;  
171        break;        break;
172    
173        /* At least one single char sets the bit and stops */        /* At least one single char sets the bit and stops */
# Line 162  do Line 180  do
180    
181        case OP_PLUS:        case OP_PLUS:
182        case OP_MINPLUS:        case OP_MINPLUS:
183        set_bit(start_bits, tcode[1], caseless);        set_bit(start_bits, tcode[1], caseless, cd);
184          try_next = FALSE;
185        break;        break;
186    
187        /* Single character type sets the bits and stops */        /* Single character type sets the bits and stops */
188    
189        case OP_NOT_DIGIT:        case OP_NOT_DIGIT:
190        for (c = 0; c < 32; c++) start_bits[c] |= ~pcre_cbits[c+cbit_digit];        for (c = 0; c < 32; c++)
191            start_bits[c] |= ~cd->cbits[c+cbit_digit];
192          try_next = FALSE;
193        break;        break;
194    
195        case OP_DIGIT:        case OP_DIGIT:
196        for (c = 0; c < 32; c++) start_bits[c] |= pcre_cbits[c+cbit_digit];        for (c = 0; c < 32; c++)
197            start_bits[c] |= cd->cbits[c+cbit_digit];
198          try_next = FALSE;
199        break;        break;
200    
201        case OP_NOT_WHITESPACE:        case OP_NOT_WHITESPACE:
202        for (c = 0; c < 32; c++) start_bits[c] |= ~pcre_cbits[c+cbit_space];        for (c = 0; c < 32; c++)
203            start_bits[c] |= ~cd->cbits[c+cbit_space];
204          try_next = FALSE;
205        break;        break;
206    
207        case OP_WHITESPACE:        case OP_WHITESPACE:
208        for (c = 0; c < 32; c++) start_bits[c] |= pcre_cbits[c+cbit_space];        for (c = 0; c < 32; c++)
209            start_bits[c] |= cd->cbits[c+cbit_space];
210          try_next = FALSE;
211        break;        break;
212    
213        case OP_NOT_WORDCHAR:        case OP_NOT_WORDCHAR:
214        for (c = 0; c < 32; c++)        for (c = 0; c < 32; c++)
215          start_bits[c] |= ~(pcre_cbits[c] | pcre_cbits[c+cbit_word]);          start_bits[c] |= ~cd->cbits[c+cbit_word];
216          try_next = FALSE;
217        break;        break;
218    
219        case OP_WORDCHAR:        case OP_WORDCHAR:
220        for (c = 0; c < 32; c++)        for (c = 0; c < 32; c++)
221          start_bits[c] |= (pcre_cbits[c] | pcre_cbits[c+cbit_word]);          start_bits[c] |= cd->cbits[c+cbit_word];
222          try_next = FALSE;
223        break;        break;
224    
225        /* One or more character type fudges the pointer and restarts, knowing        /* One or more character type fudges the pointer and restarts, knowing
# Line 199  do Line 228  do
228        case OP_TYPEPLUS:        case OP_TYPEPLUS:
229        case OP_TYPEMINPLUS:        case OP_TYPEMINPLUS:
230        tcode++;        tcode++;
       try_next = TRUE;  
231        break;        break;
232    
233        case OP_TYPEEXACT:        case OP_TYPEEXACT:
234        tcode += 3;        tcode += 3;
       try_next = TRUE;  
235        break;        break;
236    
237        /* Zero or more repeats of character types set the bits and then        /* Zero or more repeats of character types set the bits and then
# Line 221  do Line 248  do
248        switch(tcode[1])        switch(tcode[1])
249          {          {
250          case OP_NOT_DIGIT:          case OP_NOT_DIGIT:
251          for (c = 0; c < 32; c++) start_bits[c] |= ~pcre_cbits[c+cbit_digit];          for (c = 0; c < 32; c++)
252              start_bits[c] |= ~cd->cbits[c+cbit_digit];
253          break;          break;
254    
255          case OP_DIGIT:          case OP_DIGIT:
256          for (c = 0; c < 32; c++) start_bits[c] |= pcre_cbits[c+cbit_digit];          for (c = 0; c < 32; c++)
257              start_bits[c] |= cd->cbits[c+cbit_digit];
258          break;          break;
259    
260          case OP_NOT_WHITESPACE:          case OP_NOT_WHITESPACE:
261          for (c = 0; c < 32; c++) start_bits[c] |= ~pcre_cbits[c+cbit_space];          for (c = 0; c < 32; c++)
262              start_bits[c] |= ~cd->cbits[c+cbit_space];
263          break;          break;
264    
265          case OP_WHITESPACE:          case OP_WHITESPACE:
266          for (c = 0; c < 32; c++) start_bits[c] |= pcre_cbits[c+cbit_space];          for (c = 0; c < 32; c++)
267              start_bits[c] |= cd->cbits[c+cbit_space];
268          break;          break;
269    
270          case OP_NOT_WORDCHAR:          case OP_NOT_WORDCHAR:
271          for (c = 0; c < 32; c++)          for (c = 0; c < 32; c++)
272            start_bits[c] |= ~(pcre_cbits[c] | pcre_cbits[c+cbit_word]);            start_bits[c] |= ~cd->cbits[c+cbit_word];
273          break;          break;
274    
275          case OP_WORDCHAR:          case OP_WORDCHAR:
276          for (c = 0; c < 32; c++)          for (c = 0; c < 32; c++)
277            start_bits[c] |= (pcre_cbits[c] | pcre_cbits[c+cbit_word]);            start_bits[c] |= cd->cbits[c+cbit_word];
278          break;          break;
279          }          }
280    
281        tcode += 2;        tcode += 2;
       try_next = TRUE;  
282        break;        break;
283    
284        /* Character class: set the bits and either carry on or not,        /* Character class: set the bits and either carry on or not,
# Line 266  do Line 296  do
296            case OP_CRQUERY:            case OP_CRQUERY:
297            case OP_CRMINQUERY:            case OP_CRMINQUERY:
298            tcode++;            tcode++;
           try_next = TRUE;  
299            break;            break;
300    
301            case OP_CRRANGE:            case OP_CRRANGE:
302            case OP_CRMINRANGE:            case OP_CRMINRANGE:
303            if (((tcode[1] << 8) + tcode[2]) == 0)            if (((tcode[1] << 8) + tcode[2]) == 0) tcode += 5;
304              {              else try_next = FALSE;
305              tcode += 5;            break;
306              try_next = TRUE;  
307              }            default:
308              try_next = FALSE;
309            break;            break;
310            }            }
311          }          }
# Line 316  pcre_study(const pcre *external_re, int Line 346  pcre_study(const pcre *external_re, int
346  uschar start_bits[32];  uschar start_bits[32];
347  real_pcre_extra *extra;  real_pcre_extra *extra;
348  const real_pcre *re = (const real_pcre *)external_re;  const real_pcre *re = (const real_pcre *)external_re;
349    compile_data compile_block;
350    
351  *errorptr = NULL;  *errorptr = NULL;
352    
# Line 338  present. */ Line 369  present. */
369  if ((re->options & (PCRE_ANCHORED|PCRE_FIRSTSET|PCRE_STARTLINE)) != 0)  if ((re->options & (PCRE_ANCHORED|PCRE_FIRSTSET|PCRE_STARTLINE)) != 0)
370    return NULL;    return NULL;
371    
372    /* Set the character tables in the block which is passed around */
373    
374    compile_block.lcc = re->tables + lcc_offset;
375    compile_block.fcc = re->tables + fcc_offset;
376    compile_block.cbits = re->tables + cbits_offset;
377    compile_block.ctypes = re->tables + ctypes_offset;
378    
379  /* See if we can find a fixed set of initial characters for the pattern. */  /* See if we can find a fixed set of initial characters for the pattern. */
380    
381  memset(start_bits, 0, 32 * sizeof(uschar));  memset(start_bits, 0, 32 * sizeof(uschar));
382  if (!set_start_bits(re->code, start_bits, (re->options & PCRE_CASELESS) != 0))  if (!set_start_bits(re->code, start_bits, (re->options & PCRE_CASELESS) != 0,
383    return NULL;    &compile_block)) return NULL;
384    
385  /* Get an "extra" block and put the information therein. */  /* Get an "extra" block and put the information therein. */
386    

Legend:
Removed from v.23  
changed lines
  Added in v.53

  ViewVC Help
Powered by ViewVC 1.1.5