/[pcre]/code/tags/pcre-2.08a/study.c
ViewVC logotype

Diff of /code/tags/pcre-2.08a/study.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 7 by nigel, Sat Feb 24 21:38:09 2007 UTC revision 25 by nigel, Sat Feb 24 21:38:45 2007 UTC
# Line 9  the file Tech.Notes for some information Line 9  the file Tech.Notes for some information
9    
10  Written by: Philip Hazel <ph10@cam.ac.uk>  Written by: Philip Hazel <ph10@cam.ac.uk>
11    
12             Copyright (c) 1997 University of Cambridge             Copyright (c) 1998 University of Cambridge
13    
14  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
15  Permission is granted to anyone to use this software for any purpose on any  Permission is granted to anyone to use this software for any purpose on any
# Line 37  the external pcre header. */ Line 37  the external pcre header. */
37    
38    
39  /*************************************************  /*************************************************
40    *      Set a bit and maybe its alternate case    *
41    *************************************************/
42    
43    /* Given a character, set its bit in the table, and also the bit for the other
44    version of a letter if we are caseless.
45    
46    Arguments:
47      start_bits    points to the bit map
48      c             is the character
49      caseless      the caseless flag
50      cd            the block with char table pointers
51    
52    Returns:        nothing
53    */
54    
55    static void
56    set_bit(uschar *start_bits, int c, BOOL caseless, compile_data *cd)
57    {
58    start_bits[c/8] |= (1 << (c&7));
59    if (caseless && (cd->ctypes[c] & ctype_letter) != 0)
60      start_bits[cd->fcc[c]/8] |= (1 << (cd->fcc[c]&7));
61    }
62    
63    
64    
65    /*************************************************
66  *          Create bitmap of starting chars       *  *          Create bitmap of starting chars       *
67  *************************************************/  *************************************************/
68    
# Line 47  goes by, we may be able to get more clev Line 73  goes by, we may be able to get more clev
73  Arguments:  Arguments:
74    code         points to an expression    code         points to an expression
75    start_bits   points to a 32-byte table, initialized to 0    start_bits   points to a 32-byte table, initialized to 0
76      caseless     the current state of the caseless flag
77      cd           the block with char table pointers
78    
79  Returns:       TRUE if table built, FALSE otherwise  Returns:       TRUE if table built, FALSE otherwise
80  */  */
81    
82  static BOOL  static BOOL
83  set_start_bits(const uschar *code, uschar *start_bits)  set_start_bits(const uschar *code, uschar *start_bits, BOOL caseless,
84      compile_data *cd)
85  {  {
86  register int c;  register int c;
87    
# Line 65  do Line 94  do
94      {      {
95      try_next = FALSE;      try_next = FALSE;
96    
97        /* If a branch starts with a bracket or a positive lookahead assertion,
98        recurse to set bits from within them. That's all for this branch. */
99    
100      if ((int)*tcode >= OP_BRA || *tcode == OP_ASSERT)      if ((int)*tcode >= OP_BRA || *tcode == OP_ASSERT)
101        {        {
102        if (!set_start_bits(tcode, start_bits)) return FALSE;        if (!set_start_bits(tcode, start_bits, caseless, cd))
103            return FALSE;
104        }        }
105    
106      else switch(*tcode)      else switch(*tcode)
# Line 75  do Line 108  do
108        default:        default:
109        return FALSE;        return FALSE;
110    
111          /* Skip over lookbehind and negative lookahead assertions */
112    
113          case OP_ASSERT_NOT:
114          case OP_ASSERTBACK:
115          case OP_ASSERTBACK_NOT:
116          try_next = TRUE;
117          do tcode += (tcode[1] << 8) + tcode[2]; while (*tcode == OP_ALT);
118          tcode += 3;
119          break;
120    
121          /* Skip over an option setting, changing the caseless flag */
122    
123          case OP_OPT:
124          caseless = (tcode[1] & PCRE_CASELESS) != 0;
125          tcode += 2;
126          try_next = TRUE;
127          break;
128    
129        /* BRAZERO does the bracket, but carries on. */        /* BRAZERO does the bracket, but carries on. */
130    
131        case OP_BRAZERO:        case OP_BRAZERO:
132        case OP_BRAMINZERO:        case OP_BRAMINZERO:
133        if (!set_start_bits(++tcode, start_bits)) return FALSE;        if (!set_start_bits(++tcode, start_bits, caseless, cd))
134            return FALSE;
135        do tcode += (tcode[1] << 8) + tcode[2]; while (*tcode == OP_ALT);        do tcode += (tcode[1] << 8) + tcode[2]; while (*tcode == OP_ALT);
136        tcode += 3;        tcode += 3;
137        try_next = TRUE;        try_next = TRUE;
# Line 91  do Line 143  do
143        case OP_MINSTAR:        case OP_MINSTAR:
144        case OP_QUERY:        case OP_QUERY:
145        case OP_MINQUERY:        case OP_MINQUERY:
146        start_bits[tcode[1]/8] |= (1 << (tcode[1]&7));        set_bit(start_bits, tcode[1], caseless, cd);
147        tcode += 2;        tcode += 2;
148        try_next = TRUE;        try_next = TRUE;
149        break;        break;
# Line 100  do Line 152  do
152    
153        case OP_UPTO:        case OP_UPTO:
154        case OP_MINUPTO:        case OP_MINUPTO:
155        start_bits[tcode[3]/8] |= (1 << (tcode[3]&7));        set_bit(start_bits, tcode[3], caseless, cd);
156        tcode += 4;        tcode += 4;
157        try_next = TRUE;        try_next = TRUE;
158        break;        break;
# Line 115  do Line 167  do
167    
168        case OP_PLUS:        case OP_PLUS:
169        case OP_MINPLUS:        case OP_MINPLUS:
170        start_bits[tcode[1]/8] |= (1 << (tcode[1]&7));        set_bit(start_bits, tcode[1], caseless, cd);
171        break;        break;
172    
173        /* Single character type sets the bits and stops */        /* Single character type sets the bits and stops */
174    
175        case OP_NOT_DIGIT:        case OP_NOT_DIGIT:
176        for (c = 0; c < 32; c++) start_bits[c] |= ~pcre_cbits[c+cbit_digit];        for (c = 0; c < 32; c++)
177            start_bits[c] |= ~cd->cbits[c+cbit_digit];
178        break;        break;
179    
180        case OP_DIGIT:        case OP_DIGIT:
181        for (c = 0; c < 32; c++) start_bits[c] |= pcre_cbits[c+cbit_digit];        for (c = 0; c < 32; c++)
182            start_bits[c] |= cd->cbits[c+cbit_digit];
183        break;        break;
184    
185        case OP_NOT_WHITESPACE:        case OP_NOT_WHITESPACE:
186        for (c = 0; c < 32; c++) start_bits[c] |= ~pcre_cbits[c+cbit_space];        for (c = 0; c < 32; c++)
187            start_bits[c] |= ~cd->cbits[c+cbit_space];
188        break;        break;
189    
190        case OP_WHITESPACE:        case OP_WHITESPACE:
191        for (c = 0; c < 32; c++) start_bits[c] |= pcre_cbits[c+cbit_space];        for (c = 0; c < 32; c++)
192            start_bits[c] |= cd->cbits[c+cbit_space];
193        break;        break;
194    
195        case OP_NOT_WORDCHAR:        case OP_NOT_WORDCHAR:
196        for (c = 0; c < 32; c++)        for (c = 0; c < 32; c++)
197          start_bits[c] |= ~(pcre_cbits[c] | pcre_cbits[c+cbit_word]);          start_bits[c] |= ~(cd->cbits[c] | cd->cbits[c+cbit_word]);
198        break;        break;
199    
200        case OP_WORDCHAR:        case OP_WORDCHAR:
201        for (c = 0; c < 32; c++)        for (c = 0; c < 32; c++)
202          start_bits[c] |= (pcre_cbits[c] | pcre_cbits[c+cbit_word]);          start_bits[c] |= (cd->cbits[c] | cd->cbits[c+cbit_word]);
203        break;        break;
204    
205        /* One or more character type fudges the pointer and restarts, knowing        /* One or more character type fudges the pointer and restarts, knowing
# Line 174  do Line 230  do
230        switch(tcode[1])        switch(tcode[1])
231          {          {
232          case OP_NOT_DIGIT:          case OP_NOT_DIGIT:
233          for (c = 0; c < 32; c++) start_bits[c] |= ~pcre_cbits[c+cbit_digit];          for (c = 0; c < 32; c++)
234              start_bits[c] |= ~cd->cbits[c+cbit_digit];
235          break;          break;
236    
237          case OP_DIGIT:          case OP_DIGIT:
238          for (c = 0; c < 32; c++) start_bits[c] |= pcre_cbits[c+cbit_digit];          for (c = 0; c < 32; c++)
239              start_bits[c] |= cd->cbits[c+cbit_digit];
240          break;          break;
241    
242          case OP_NOT_WHITESPACE:          case OP_NOT_WHITESPACE:
243          for (c = 0; c < 32; c++) start_bits[c] |= ~pcre_cbits[c+cbit_space];          for (c = 0; c < 32; c++)
244              start_bits[c] |= ~cd->cbits[c+cbit_space];
245          break;          break;
246    
247          case OP_WHITESPACE:          case OP_WHITESPACE:
248          for (c = 0; c < 32; c++) start_bits[c] |= pcre_cbits[c+cbit_space];          for (c = 0; c < 32; c++)
249              start_bits[c] |= cd->cbits[c+cbit_space];
250          break;          break;
251    
252          case OP_NOT_WORDCHAR:          case OP_NOT_WORDCHAR:
253          for (c = 0; c < 32; c++)          for (c = 0; c < 32; c++)
254            start_bits[c] |= ~(pcre_cbits[c] | pcre_cbits[c+cbit_word]);            start_bits[c] |= ~(cd->cbits[c] | cd->cbits[c+cbit_word]);
255          break;          break;
256    
257          case OP_WORDCHAR:          case OP_WORDCHAR:
258          for (c = 0; c < 32; c++)          for (c = 0; c < 32; c++)
259            start_bits[c] |= (pcre_cbits[c] | pcre_cbits[c+cbit_word]);            start_bits[c] |= (cd->cbits[c] | cd->cbits[c+cbit_word]);
260          break;          break;
261          }          }
262    
# Line 266  Returns:    pointer to a pcre_extra bloc Line 326  Returns:    pointer to a pcre_extra bloc
326  pcre_extra *  pcre_extra *
327  pcre_study(const pcre *external_re, int options, const char **errorptr)  pcre_study(const pcre *external_re, int options, const char **errorptr)
328  {  {
 BOOL caseless;  
329  uschar start_bits[32];  uschar start_bits[32];
330  real_pcre_extra *extra;  real_pcre_extra *extra;
331  const real_pcre *re = (const real_pcre *)external_re;  const real_pcre *re = (const real_pcre *)external_re;
332    compile_data compile_block;
333    
334  *errorptr = NULL;  *errorptr = NULL;
335    
# Line 285  if ((options & ~PUBLIC_STUDY_OPTIONS) != Line 345  if ((options & ~PUBLIC_STUDY_OPTIONS) !=
345    return NULL;    return NULL;
346    }    }
347    
 /* Caseless can either be from the compiled regex or from options. */  
   
 caseless = ((re->options | options) & PCRE_CASELESS) != 0;  
   
348  /* For an anchored pattern, or an unchored pattern that has a first char, or a  /* For an anchored pattern, or an unchored pattern that has a first char, or a
349  multiline pattern that matches only at "line starts", no further processing at  multiline pattern that matches only at "line starts", no further processing at
350  present. */  present. */
# Line 296  present. */ Line 352  present. */
352  if ((re->options & (PCRE_ANCHORED|PCRE_FIRSTSET|PCRE_STARTLINE)) != 0)  if ((re->options & (PCRE_ANCHORED|PCRE_FIRSTSET|PCRE_STARTLINE)) != 0)
353    return NULL;    return NULL;
354    
355  /* See if we can find a fixed set of initial characters for the pattern. */  /* Set the character tables in the block which is passed around */
356    
357  memset(start_bits, 0, 32 * sizeof(uschar));  compile_block.lcc = re->tables + lcc_offset;
358  if (!set_start_bits(re->code, start_bits)) return NULL;  compile_block.fcc = re->tables + fcc_offset;
359    compile_block.cbits = re->tables + cbits_offset;
360    compile_block.ctypes = re->tables + ctypes_offset;
361    
362  /* If this studying is caseless, scan the created bit map and duplicate the  /* See if we can find a fixed set of initial characters for the pattern. */
 bits for any letters. */  
363    
364  if (caseless)  memset(start_bits, 0, 32 * sizeof(uschar));
365    {  if (!set_start_bits(re->code, start_bits, (re->options & PCRE_CASELESS) != 0,
366    register int c;    &compile_block)) return NULL;
   for (c = 0; c < 256; c++)  
     {  
     if ((start_bits[c/8] & (1 << (c&7))) != 0 &&  
         (pcre_ctypes[c] & ctype_letter) != 0)  
       {  
       int d = pcre_fcc[c];  
       start_bits[d/8] |= (1 << (d&7));  
       }  
     }  
   }  
367    
368  /* Get an "extra" block and put the information therein. */  /* Get an "extra" block and put the information therein. */
369    
# Line 328  if (extra == NULL) Line 375  if (extra == NULL)
375    return NULL;    return NULL;
376    }    }
377    
378  extra->options = PCRE_STUDY_MAPPED | (caseless? PCRE_STUDY_CASELESS : 0);  extra->options = PCRE_STUDY_MAPPED;
379  memcpy(extra->start_bits, start_bits, sizeof(start_bits));  memcpy(extra->start_bits, start_bits, sizeof(start_bits));
380    
381  return (pcre_extra *)extra;  return (pcre_extra *)extra;

Legend:
Removed from v.7  
changed lines
  Added in v.25

  ViewVC Help
Powered by ViewVC 1.1.5