/[pcre]/code/tags/pcre-5.0/study.c
ViewVC logotype

Diff of /code/tags/pcre-5.0/study.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 53 by nigel, Sat Feb 24 21:39:42 2007 UTC revision 63 by nigel, Sat Feb 24 21:40:03 2007 UTC
# Line 9  the file Tech.Notes for some information Line 9  the file Tech.Notes for some information
9    
10  Written by: Philip Hazel <ph10@cam.ac.uk>  Written by: Philip Hazel <ph10@cam.ac.uk>
11    
12             Copyright (c) 1997-2001 University of Cambridge             Copyright (c) 1997-2002 University of Cambridge
13    
14  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
15  Permission is granted to anyone to use this software for any purpose on any  Permission is granted to anyone to use this software for any purpose on any
# Line 78  Arguments: Line 78  Arguments:
78    code         points to an expression    code         points to an expression
79    start_bits   points to a 32-byte table, initialized to 0    start_bits   points to a 32-byte table, initialized to 0
80    caseless     the current state of the caseless flag    caseless     the current state of the caseless flag
81      utf8         TRUE if in UTF-8 mode
82    cd           the block with char table pointers    cd           the block with char table pointers
83    
84  Returns:       TRUE if table built, FALSE otherwise  Returns:       TRUE if table built, FALSE otherwise
# Line 85  Returns:       TRUE if table built, FALS Line 86  Returns:       TRUE if table built, FALS
86    
87  static BOOL  static BOOL
88  set_start_bits(const uschar *code, uschar *start_bits, BOOL caseless,  set_start_bits(const uschar *code, uschar *start_bits, BOOL caseless,
89    compile_data *cd)    BOOL utf8, compile_data *cd)
90  {  {
91  register int c;  register int c;
92    
# Line 99  volatile int dummy; Line 100  volatile int dummy;
100    
101  do  do
102    {    {
103    const uschar *tcode = code + 3;    const uschar *tcode = code + 1 + LINK_SIZE;
104    BOOL try_next = TRUE;    BOOL try_next = TRUE;
105    
106    while (try_next)    while (try_next)
# Line 109  do Line 110  do
110    
111      if ((int)*tcode >= OP_BRA || *tcode == OP_ASSERT)      if ((int)*tcode >= OP_BRA || *tcode == OP_ASSERT)
112        {        {
113        if (!set_start_bits(tcode, start_bits, caseless, cd))        if (!set_start_bits(tcode, start_bits, caseless, utf8, cd))
114          return FALSE;          return FALSE;
115        try_next = FALSE;        try_next = FALSE;
116        }        }
# Line 119  do Line 120  do
120        default:        default:
121        return FALSE;        return FALSE;
122    
123          /* Skip over callout */
124    
125          case OP_CALLOUT:
126          tcode += 2;
127          break;
128    
129        /* Skip over extended extraction bracket number */        /* Skip over extended extraction bracket number */
130    
131        case OP_BRANUMBER:        case OP_BRANUMBER:
# Line 130  do Line 137  do
137        case OP_ASSERT_NOT:        case OP_ASSERT_NOT:
138        case OP_ASSERTBACK:        case OP_ASSERTBACK:
139        case OP_ASSERTBACK_NOT:        case OP_ASSERTBACK_NOT:
140        do tcode += (tcode[1] << 8) + tcode[2]; while (*tcode == OP_ALT);        do tcode += GET(tcode, 1); while (*tcode == OP_ALT);
141        tcode += 3;        tcode += 1+LINK_SIZE;
142        break;        break;
143    
144        /* Skip over an option setting, changing the caseless flag */        /* Skip over an option setting, changing the caseless flag */
# Line 145  do Line 152  do
152    
153        case OP_BRAZERO:        case OP_BRAZERO:
154        case OP_BRAMINZERO:        case OP_BRAMINZERO:
155        if (!set_start_bits(++tcode, start_bits, caseless, cd))        if (!set_start_bits(++tcode, start_bits, caseless, utf8, cd))
156          return FALSE;          return FALSE;
157        dummy = 1;        dummy = 1;
158        do tcode += (tcode[1] << 8) + tcode[2]; while (*tcode == OP_ALT);        do tcode += GET(tcode,1); while (*tcode == OP_ALT);
159        tcode += 3;        tcode += 1+LINK_SIZE;
160        break;        break;
161    
162        /* Single-char * or ? sets the bit and tries the next item */        /* Single-char * or ? sets the bit and tries the next item */
# Line 160  do Line 167  do
167        case OP_MINQUERY:        case OP_MINQUERY:
168        set_bit(start_bits, tcode[1], caseless, cd);        set_bit(start_bits, tcode[1], caseless, cd);
169        tcode += 2;        tcode += 2;
170    #ifdef SUPPORT_UTF8
171          if (utf8) while ((*tcode & 0xc0) == 0x80) tcode++;
172    #endif
173        break;        break;
174    
175        /* Single-char upto sets the bit and tries the next */        /* Single-char upto sets the bit and tries the next */
# Line 168  do Line 178  do
178        case OP_MINUPTO:        case OP_MINUPTO:
179        set_bit(start_bits, tcode[3], caseless, cd);        set_bit(start_bits, tcode[3], caseless, cd);
180        tcode += 4;        tcode += 4;
181    #ifdef SUPPORT_UTF8
182          if (utf8) while ((*tcode & 0xc0) == 0x80) tcode++;
183    #endif
184        break;        break;
185    
186        /* At least one single char sets the bit and stops */        /* At least one single char sets the bit and stops */
# Line 281  do Line 294  do
294        tcode += 2;        tcode += 2;
295        break;        break;
296    
297        /* Character class: set the bits and either carry on or not,        /* Character class where all the information is in a bit map: set the
298        according to the repeat count. */        bits and either carry on or not, according to the repeat count. If it was
299          a negative class, and we are operating with UTF-8 characters, any byte
300          with the top-bit set is a potentially valid starter because it may start
301          a character with a value > 255. (This is sub-optimal in that the
302          character may be in the range 128-255, and those characters might be
303          unwanted, but that's as far as we go for the moment.) */
304    
305          case OP_NCLASS:
306          if (utf8) memset(start_bits+16, 0xff, 16);
307          /* Fall through */
308    
309        case OP_CLASS:        case OP_CLASS:
310          {          {
# Line 309  do Line 331  do
331            break;            break;
332            }            }
333          }          }
334        break; /* End of class handling */        break; /* End of bitmap class handling */
335    
336        }      /* End of switch */        }      /* End of switch */
337      }        /* End of try_next loop */      }        /* End of try_next loop */
338    
339    code += (code[1] << 8) + code[2];   /* Advance to next branch */    code += GET(code, 1);   /* Advance to next branch */
340    }    }
341  while (*code == OP_ALT);  while (*code == OP_ALT);
342  return TRUE;  return TRUE;
# Line 336  Arguments: Line 358  Arguments:
358    errorptr  points to where to place error messages;    errorptr  points to where to place error messages;
359              set NULL unless error              set NULL unless error
360    
361  Returns:    pointer to a pcre_extra block,  Returns:    pointer to a pcre_extra block, with study_data filled in and the
362                  appropriate flag set;
363              NULL on error or if no optimization possible              NULL on error or if no optimization possible
364  */  */
365    
# Line 344  pcre_extra * Line 367  pcre_extra *
367  pcre_study(const pcre *external_re, int options, const char **errorptr)  pcre_study(const pcre *external_re, int options, const char **errorptr)
368  {  {
369  uschar start_bits[32];  uschar start_bits[32];
370  real_pcre_extra *extra;  pcre_extra *extra;
371    pcre_study_data *study;
372  const real_pcre *re = (const real_pcre *)external_re;  const real_pcre *re = (const real_pcre *)external_re;
373    uschar *code = (uschar *)re + sizeof(real_pcre) +
374      (re->name_count * re->name_entry_size);
375  compile_data compile_block;  compile_data compile_block;
376    
377  *errorptr = NULL;  *errorptr = NULL;
# Line 362  if ((options & ~PUBLIC_STUDY_OPTIONS) != Line 388  if ((options & ~PUBLIC_STUDY_OPTIONS) !=
388    return NULL;    return NULL;
389    }    }
390    
391  /* For an anchored pattern, or an unchored pattern that has a first char, or a  /* For an anchored pattern, or an unanchored pattern that has a first char, or
392  multiline pattern that matches only at "line starts", no further processing at  a multiline pattern that matches only at "line starts", no further processing
393  present. */  at present. */
394    
395  if ((re->options & (PCRE_ANCHORED|PCRE_FIRSTSET|PCRE_STARTLINE)) != 0)  if ((re->options & (PCRE_ANCHORED|PCRE_FIRSTSET|PCRE_STARTLINE)) != 0)
396    return NULL;    return NULL;
# Line 379  compile_block.ctypes = re->tables + ctyp Line 405  compile_block.ctypes = re->tables + ctyp
405  /* See if we can find a fixed set of initial characters for the pattern. */  /* See if we can find a fixed set of initial characters for the pattern. */
406    
407  memset(start_bits, 0, 32 * sizeof(uschar));  memset(start_bits, 0, 32 * sizeof(uschar));
408  if (!set_start_bits(re->code, start_bits, (re->options & PCRE_CASELESS) != 0,  if (!set_start_bits(code, start_bits, (re->options & PCRE_CASELESS) != 0,
409    &compile_block)) return NULL;    (re->options & PCRE_UTF8) != 0, &compile_block)) return NULL;
410    
411  /* Get an "extra" block and put the information therein. */  /* Get a pcre_extra block and a pcre_study_data block. The study data is put in
412    the latter, which is pointed to by the former, which may also get additional
413    data set later by the calling program. At the moment, the size of
414    pcre_study_data is fixed. We nevertheless save it in a field for returning via
415    the pcre_fullinfo() function so that if it becomes variable in the future, we
416    don't have to change that code. */
417    
418  extra = (real_pcre_extra *)(pcre_malloc)(sizeof(real_pcre_extra));  extra = (pcre_extra *)(pcre_malloc)
419      (sizeof(pcre_extra) + sizeof(pcre_study_data));
420    
421  if (extra == NULL)  if (extra == NULL)
422    {    {
# Line 392  if (extra == NULL) Line 424  if (extra == NULL)
424    return NULL;    return NULL;
425    }    }
426    
427  extra->options = PCRE_STUDY_MAPPED;  study = (pcre_study_data *)((char *)extra + sizeof(pcre_extra));
428  memcpy(extra->start_bits, start_bits, sizeof(start_bits));  extra->flags = PCRE_EXTRA_STUDY_DATA;
429    extra->study_data = study;
430    
431    study->size = sizeof(pcre_study_data);
432    study->options = PCRE_STUDY_MAPPED;
433    memcpy(study->start_bits, start_bits, sizeof(start_bits));
434    
435  return (pcre_extra *)extra;  return extra;
436  }  }
437    
438  /* End of study.c */  /* End of study.c */

Legend:
Removed from v.53  
changed lines
  Added in v.63

  ViewVC Help
Powered by ViewVC 1.1.5