/[pcre]/code/tags/pcre-2.03/study.c
ViewVC logotype

Diff of /code/tags/pcre-2.03/study.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 13 by nigel, Sat Feb 24 21:38:21 2007 UTC revision 29 by nigel, Sat Feb 24 21:38:53 2007 UTC
# Line 9  the file Tech.Notes for some information Line 9  the file Tech.Notes for some information
9    
10  Written by: Philip Hazel <ph10@cam.ac.uk>  Written by: Philip Hazel <ph10@cam.ac.uk>
11    
12             Copyright (c) 1997 University of Cambridge             Copyright (c) 1997-1999 University of Cambridge
13    
14  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
15  Permission is granted to anyone to use this software for any purpose on any  Permission is granted to anyone to use this software for any purpose on any
# Line 25  restrictions: Line 25  restrictions:
25    
26  3. Altered versions must be plainly marked as such, and must not be  3. Altered versions must be plainly marked as such, and must not be
27     misrepresented as being the original software.     misrepresented as being the original software.
28    
29    4. If PCRE is embedded in any software that is released under the GNU
30       General Purpose Licence (GPL), then the terms of that licence shall
31       supersede any condition above with which it is incompatible.
32  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
33  */  */
34    
# Line 37  the external pcre header. */ Line 41  the external pcre header. */
41    
42    
43  /*************************************************  /*************************************************
44    *      Set a bit and maybe its alternate case    *
45    *************************************************/
46    
47    /* Given a character, set its bit in the table, and also the bit for the other
48    version of a letter if we are caseless.
49    
50    Arguments:
51      start_bits    points to the bit map
52      c             is the character
53      caseless      the caseless flag
54      cd            the block with char table pointers
55    
56    Returns:        nothing
57    */
58    
59    static void
60    set_bit(uschar *start_bits, int c, BOOL caseless, compile_data *cd)
61    {
62    start_bits[c/8] |= (1 << (c&7));
63    if (caseless && (cd->ctypes[c] & ctype_letter) != 0)
64      start_bits[cd->fcc[c]/8] |= (1 << (cd->fcc[c]&7));
65    }
66    
67    
68    
69    /*************************************************
70  *          Create bitmap of starting chars       *  *          Create bitmap of starting chars       *
71  *************************************************/  *************************************************/
72    
# Line 47  goes by, we may be able to get more clev Line 77  goes by, we may be able to get more clev
77  Arguments:  Arguments:
78    code         points to an expression    code         points to an expression
79    start_bits   points to a 32-byte table, initialized to 0    start_bits   points to a 32-byte table, initialized to 0
80      caseless     the current state of the caseless flag
81      cd           the block with char table pointers
82    
83  Returns:       TRUE if table built, FALSE otherwise  Returns:       TRUE if table built, FALSE otherwise
84  */  */
85    
86  static BOOL  static BOOL
87  set_start_bits(const uschar *code, uschar *start_bits)  set_start_bits(const uschar *code, uschar *start_bits, BOOL caseless,
88      compile_data *cd)
89  {  {
90  register int c;  register int c;
91    
92    /* This next statement and the later reference to dummy are here in order to
93    trick the optimizer of the IBM C compiler for OS/2 into generating correct
94    code. Apparently IBM isn't going to fix the problem, and we would rather not
95    disable optimization (in this module it actually makes a big difference, and
96    the pcre module can use all the optimization it can get). */
97    
98    volatile int dummy;
99    
100  do  do
101    {    {
102    const uschar *tcode = code + 3;    const uschar *tcode = code + 3;
# Line 65  do Line 106  do
106      {      {
107      try_next = FALSE;      try_next = FALSE;
108    
109        /* If a branch starts with a bracket or a positive lookahead assertion,
110        recurse to set bits from within them. That's all for this branch. */
111    
112      if ((int)*tcode >= OP_BRA || *tcode == OP_ASSERT)      if ((int)*tcode >= OP_BRA || *tcode == OP_ASSERT)
113        {        {
114        if (!set_start_bits(tcode, start_bits)) return FALSE;        if (!set_start_bits(tcode, start_bits, caseless, cd))
115            return FALSE;
116        }        }
117    
118      else switch(*tcode)      else switch(*tcode)
# Line 75  do Line 120  do
120        default:        default:
121        return FALSE;        return FALSE;
122    
123          /* Skip over lookbehind and negative lookahead assertions */
124    
125          case OP_ASSERT_NOT:
126          case OP_ASSERTBACK:
127          case OP_ASSERTBACK_NOT:
128          try_next = TRUE;
129          do tcode += (tcode[1] << 8) + tcode[2]; while (*tcode == OP_ALT);
130          tcode += 3;
131          break;
132    
133          /* Skip over an option setting, changing the caseless flag */
134    
135          case OP_OPT:
136          caseless = (tcode[1] & PCRE_CASELESS) != 0;
137          tcode += 2;
138          try_next = TRUE;
139          break;
140    
141        /* BRAZERO does the bracket, but carries on. */        /* BRAZERO does the bracket, but carries on. */
142    
143        case OP_BRAZERO:        case OP_BRAZERO:
144        case OP_BRAMINZERO:        case OP_BRAMINZERO:
145        if (!set_start_bits(++tcode, start_bits)) return FALSE;        if (!set_start_bits(++tcode, start_bits, caseless, cd))
146            return FALSE;
147          dummy = 1;
148        do tcode += (tcode[1] << 8) + tcode[2]; while (*tcode == OP_ALT);        do tcode += (tcode[1] << 8) + tcode[2]; while (*tcode == OP_ALT);
149        tcode += 3;        tcode += 3;
150        try_next = TRUE;        try_next = TRUE;
# Line 91  do Line 156  do
156        case OP_MINSTAR:        case OP_MINSTAR:
157        case OP_QUERY:        case OP_QUERY:
158        case OP_MINQUERY:        case OP_MINQUERY:
159        start_bits[tcode[1]/8] |= (1 << (tcode[1]&7));        set_bit(start_bits, tcode[1], caseless, cd);
160        tcode += 2;        tcode += 2;
161        try_next = TRUE;        try_next = TRUE;
162        break;        break;
# Line 100  do Line 165  do
165    
166        case OP_UPTO:        case OP_UPTO:
167        case OP_MINUPTO:        case OP_MINUPTO:
168        start_bits[tcode[3]/8] |= (1 << (tcode[3]&7));        set_bit(start_bits, tcode[3], caseless, cd);
169        tcode += 4;        tcode += 4;
170        try_next = TRUE;        try_next = TRUE;
171        break;        break;
# Line 115  do Line 180  do
180    
181        case OP_PLUS:        case OP_PLUS:
182        case OP_MINPLUS:        case OP_MINPLUS:
183        start_bits[tcode[1]/8] |= (1 << (tcode[1]&7));        set_bit(start_bits, tcode[1], caseless, cd);
184        break;        break;
185    
186        /* Single character type sets the bits and stops */        /* Single character type sets the bits and stops */
187    
188        case OP_NOT_DIGIT:        case OP_NOT_DIGIT:
189        for (c = 0; c < 32; c++) start_bits[c] |= ~pcre_cbits[c+cbit_digit];        for (c = 0; c < 32; c++)
190            start_bits[c] |= ~cd->cbits[c+cbit_digit];
191        break;        break;
192    
193        case OP_DIGIT:        case OP_DIGIT:
194        for (c = 0; c < 32; c++) start_bits[c] |= pcre_cbits[c+cbit_digit];        for (c = 0; c < 32; c++)
195            start_bits[c] |= cd->cbits[c+cbit_digit];
196        break;        break;
197    
198        case OP_NOT_WHITESPACE:        case OP_NOT_WHITESPACE:
199        for (c = 0; c < 32; c++) start_bits[c] |= ~pcre_cbits[c+cbit_space];        for (c = 0; c < 32; c++)
200            start_bits[c] |= ~cd->cbits[c+cbit_space];
201        break;        break;
202    
203        case OP_WHITESPACE:        case OP_WHITESPACE:
204        for (c = 0; c < 32; c++) start_bits[c] |= pcre_cbits[c+cbit_space];        for (c = 0; c < 32; c++)
205            start_bits[c] |= cd->cbits[c+cbit_space];
206        break;        break;
207    
208        case OP_NOT_WORDCHAR:        case OP_NOT_WORDCHAR:
209        for (c = 0; c < 32; c++)        for (c = 0; c < 32; c++)
210          start_bits[c] |= ~(pcre_cbits[c] | pcre_cbits[c+cbit_word]);          start_bits[c] |= ~(cd->cbits[c] | cd->cbits[c+cbit_word]);
211        break;        break;
212    
213        case OP_WORDCHAR:        case OP_WORDCHAR:
214        for (c = 0; c < 32; c++)        for (c = 0; c < 32; c++)
215          start_bits[c] |= (pcre_cbits[c] | pcre_cbits[c+cbit_word]);          start_bits[c] |= (cd->cbits[c] | cd->cbits[c+cbit_word]);
216        break;        break;
217    
218        /* One or more character type fudges the pointer and restarts, knowing        /* One or more character type fudges the pointer and restarts, knowing
# Line 174  do Line 243  do
243        switch(tcode[1])        switch(tcode[1])
244          {          {
245          case OP_NOT_DIGIT:          case OP_NOT_DIGIT:
246          for (c = 0; c < 32; c++) start_bits[c] |= ~pcre_cbits[c+cbit_digit];          for (c = 0; c < 32; c++)
247              start_bits[c] |= ~cd->cbits[c+cbit_digit];
248          break;          break;
249    
250          case OP_DIGIT:          case OP_DIGIT:
251          for (c = 0; c < 32; c++) start_bits[c] |= pcre_cbits[c+cbit_digit];          for (c = 0; c < 32; c++)
252              start_bits[c] |= cd->cbits[c+cbit_digit];
253          break;          break;
254    
255          case OP_NOT_WHITESPACE:          case OP_NOT_WHITESPACE:
256          for (c = 0; c < 32; c++) start_bits[c] |= ~pcre_cbits[c+cbit_space];          for (c = 0; c < 32; c++)
257              start_bits[c] |= ~cd->cbits[c+cbit_space];
258          break;          break;
259    
260          case OP_WHITESPACE:          case OP_WHITESPACE:
261          for (c = 0; c < 32; c++) start_bits[c] |= pcre_cbits[c+cbit_space];          for (c = 0; c < 32; c++)
262              start_bits[c] |= cd->cbits[c+cbit_space];
263          break;          break;
264    
265          case OP_NOT_WORDCHAR:          case OP_NOT_WORDCHAR:
266          for (c = 0; c < 32; c++)          for (c = 0; c < 32; c++)
267            start_bits[c] |= ~(pcre_cbits[c] | pcre_cbits[c+cbit_word]);            start_bits[c] |= ~(cd->cbits[c] | cd->cbits[c+cbit_word]);
268          break;          break;
269    
270          case OP_WORDCHAR:          case OP_WORDCHAR:
271          for (c = 0; c < 32; c++)          for (c = 0; c < 32; c++)
272            start_bits[c] |= (pcre_cbits[c] | pcre_cbits[c+cbit_word]);            start_bits[c] |= (cd->cbits[c] | cd->cbits[c+cbit_word]);
273          break;          break;
274          }          }
275    
# Line 208  do Line 281  do
281        according to the repeat count. */        according to the repeat count. */
282    
283        case OP_CLASS:        case OP_CLASS:
       case OP_NEGCLASS:  
284          {          {
285          tcode++;          tcode++;
286          for (c = 0; c < 32; c++) start_bits[c] |= tcode[c];          for (c = 0; c < 32; c++) start_bits[c] |= tcode[c];
# Line 267  Returns:    pointer to a pcre_extra bloc Line 339  Returns:    pointer to a pcre_extra bloc
339  pcre_extra *  pcre_extra *
340  pcre_study(const pcre *external_re, int options, const char **errorptr)  pcre_study(const pcre *external_re, int options, const char **errorptr)
341  {  {
 BOOL caseless;  
342  uschar start_bits[32];  uschar start_bits[32];
343  real_pcre_extra *extra;  real_pcre_extra *extra;
344  const real_pcre *re = (const real_pcre *)external_re;  const real_pcre *re = (const real_pcre *)external_re;
345    compile_data compile_block;
346    
347  *errorptr = NULL;  *errorptr = NULL;
348    
# Line 286  if ((options & ~PUBLIC_STUDY_OPTIONS) != Line 358  if ((options & ~PUBLIC_STUDY_OPTIONS) !=
358    return NULL;    return NULL;
359    }    }
360    
 /* Caseless can either be from the compiled regex or from options. */  
   
 caseless = ((re->options | options) & PCRE_CASELESS) != 0;  
   
361  /* For an anchored pattern, or an unchored pattern that has a first char, or a  /* For an anchored pattern, or an unchored pattern that has a first char, or a
362  multiline pattern that matches only at "line starts", no further processing at  multiline pattern that matches only at "line starts", no further processing at
363  present. */  present. */
# Line 297  present. */ Line 365  present. */
365  if ((re->options & (PCRE_ANCHORED|PCRE_FIRSTSET|PCRE_STARTLINE)) != 0)  if ((re->options & (PCRE_ANCHORED|PCRE_FIRSTSET|PCRE_STARTLINE)) != 0)
366    return NULL;    return NULL;
367    
368  /* See if we can find a fixed set of initial characters for the pattern. */  /* Set the character tables in the block which is passed around */
369    
370  memset(start_bits, 0, 32 * sizeof(uschar));  compile_block.lcc = re->tables + lcc_offset;
371  if (!set_start_bits(re->code, start_bits)) return NULL;  compile_block.fcc = re->tables + fcc_offset;
372    compile_block.cbits = re->tables + cbits_offset;
373    compile_block.ctypes = re->tables + ctypes_offset;
374    
375  /* If this studying is caseless, scan the created bit map and duplicate the  /* See if we can find a fixed set of initial characters for the pattern. */
 bits for any letters. */  
376    
377  if (caseless)  memset(start_bits, 0, 32 * sizeof(uschar));
378    {  if (!set_start_bits(re->code, start_bits, (re->options & PCRE_CASELESS) != 0,
379    register int c;    &compile_block)) return NULL;
   for (c = 0; c < 256; c++)  
     {  
     if ((start_bits[c/8] & (1 << (c&7))) != 0 &&  
         (pcre_ctypes[c] & ctype_letter) != 0)  
       {  
       int d = pcre_fcc[c];  
       start_bits[d/8] |= (1 << (d&7));  
       }  
     }  
   }  
380    
381  /* Get an "extra" block and put the information therein. */  /* Get an "extra" block and put the information therein. */
382    
# Line 329  if (extra == NULL) Line 388  if (extra == NULL)
388    return NULL;    return NULL;
389    }    }
390    
391  extra->options = PCRE_STUDY_MAPPED | (caseless? PCRE_STUDY_CASELESS : 0);  extra->options = PCRE_STUDY_MAPPED;
392  memcpy(extra->start_bits, start_bits, sizeof(start_bits));  memcpy(extra->start_bits, start_bits, sizeof(start_bits));
393    
394  return (pcre_extra *)extra;  return (pcre_extra *)extra;

Legend:
Removed from v.13  
changed lines
  Added in v.29

  ViewVC Help
Powered by ViewVC 1.1.5