/[pcre]/code/trunk/pcre.c
ViewVC logotype

Diff of /code/trunk/pcre.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 5 by nigel, Sat Feb 24 21:38:05 2007 UTC revision 37 by nigel, Sat Feb 24 21:39:09 2007 UTC
# Line 9  the file Tech.Notes for some information Line 9  the file Tech.Notes for some information
9    
10  Written by: Philip Hazel <ph10@cam.ac.uk>  Written by: Philip Hazel <ph10@cam.ac.uk>
11    
12             Copyright (c) 1997 University of Cambridge             Copyright (c) 1997-1999 University of Cambridge
13    
14  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
15  Permission is granted to anyone to use this software for any purpose on any  Permission is granted to anyone to use this software for any purpose on any
# Line 25  restrictions: Line 25  restrictions:
25    
26  3. Altered versions must be plainly marked as such, and must not be  3. Altered versions must be plainly marked as such, and must not be
27     misrepresented as being the original software.     misrepresented as being the original software.
28    
29    4. If PCRE is embedded in any software that is released under the GNU
30       General Purpose Licence (GPL), then the terms of that licence shall
31       supersede any condition above with which it is incompatible.
32  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
33  */  */
34    
# Line 33  restrictions: Line 37  restrictions:
37    
38  /* #define DEBUG */  /* #define DEBUG */
39    
40    /* Use a macro for debugging printing, 'cause that eliminates the use of #ifdef
41    inline, and there are *still* stupid compilers about that don't like indented
42    pre-processor statements. I suppose it's only been 10 years... */
43    
44    #ifdef DEBUG
45    #define DPRINTF(p) printf p
46    #else
47    #define DPRINTF(p) /*nothing*/
48    #endif
49    
50  /* Include the internals header, which itself includes Standard C headers plus  /* Include the internals header, which itself includes Standard C headers plus
51  the external pcre header. */  the external pcre header. */
# Line 40  the external pcre header. */ Line 53  the external pcre header. */
53  #include "internal.h"  #include "internal.h"
54    
55    
56    /* Allow compilation as C++ source code, should anybody want to do that. */
57    
58    #ifdef __cplusplus
59    #define class pcre_class
60    #endif
61    
62    
63    /* Number of items on the nested bracket stacks at compile time. This should
64    not be set greater than 200. */
65    
66    #define BRASTACK_SIZE 200
67    
68    
69  /* Min and max values for the common repeats; for the maxima, 0 => infinity */  /* Min and max values for the common repeats; for the maxima, 0 => infinity */
70    
71  static char rep_min[] = { 0, 0, 1, 1, 0, 0 };  static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
72  static char rep_max[] = { 0, 0, 0, 0, 1, 1 };  static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
73    
74  /* Text forms of OP_ values and things, for debugging */  /* Text forms of OP_ values and things, for debugging (not all used) */
75    
76  #ifdef DEBUG  #ifdef DEBUG
77  static char *OP_names[] = { "End", "\\A", "\\B", "\\b", "\\D", "\\d",  static const char *OP_names[] = {
78    "\\S", "\\s", "\\W", "\\w", "Cut", "\\Z", "^", "$", "Any", "chars",    "End", "\\A", "\\B", "\\b", "\\D", "\\d",
79    "not",    "\\S", "\\s", "\\W", "\\w", "\\Z", "\\z",
80      "Opt", "^", "$", "Any", "chars", "not",
81    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
82    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
83    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
84    "*", "*?", "+", "+?", "?", "??", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{",
85    "class", "Ref",    "class", "Ref",
86    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", "Once",    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",
87      "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",
88    "Brazero", "Braminzero", "Bra"    "Brazero", "Braminzero", "Bra"
89  };  };
90  #endif  #endif
# Line 66  are simple data values; negative values Line 94  are simple data values; negative values
94  on. Zero means further processing is needed (for things like \x), or the escape  on. Zero means further processing is needed (for things like \x), or the escape
95  is invalid. */  is invalid. */
96    
97  static short int escapes[] = {  static const short int escapes[] = {
98      0,      0,      0,      0,      0,      0,      0,      0,   /* 0 - 7 */      0,      0,      0,      0,      0,      0,      0,      0,   /* 0 - 7 */
99      0,      0,    ':',    ';',    '<',    '=',    '>',    '?',   /* 8 - ? */      0,      0,    ':',    ';',    '<',    '=',    '>',    '?',   /* 8 - ? */
100    '@', -ESC_A, -ESC_B,      0, -ESC_D,      0,      0,      0,   /* @ - G */    '@', -ESC_A, -ESC_B,      0, -ESC_D,      0,      0,      0,   /* @ - G */
# Line 76  static short int escapes[] = { Line 104  static short int escapes[] = {
104    '`',      7, -ESC_b,      0, -ESC_d,     27,   '\f',      0,   /* ` - g */    '`',      7, -ESC_b,      0, -ESC_d,     27,   '\f',      0,   /* ` - g */
105      0,      0,      0,      0,      0,      0,   '\n',      0,   /* h - o */      0,      0,      0,      0,      0,      0,   '\n',      0,   /* h - o */
106      0,      0,   '\r', -ESC_s,   '\t',      0,      0, -ESC_w,   /* p - w */      0,      0,   '\r', -ESC_s,   '\t',      0,      0, -ESC_w,   /* p - w */
107      0,      0,      0                                            /* x - z */      0,      0, -ESC_z                                            /* x - z */
108  };  };
109    
110  /* Definition to allow mutual recursion */  /* Definition to allow mutual recursion */
111    
112  static BOOL compile_regex(int, int *,uschar **,uschar **,char **);  static BOOL
113      compile_regex(int, int, int *, uschar **, const uschar **, const char **,
114  /* Structure for passing "static" information around between the functions      BOOL, int, int *, int *, compile_data *);
 doing the matching, so that they are thread-safe. */  
   
 typedef struct match_data {  
   int    errorcode;             /* As it says */  
   int   *offset_vector;         /* Offset vector */  
   int    offset_end;            /* One past the end */  
   BOOL   offset_overflow;       /* Set if too many extractions */  
   BOOL   caseless;              /* Case-independent flag */  
   BOOL   runtime_caseless;      /* Caseless forced at run time */  
   BOOL   multiline;             /* Multiline flag */  
   BOOL   notbol;                /* NOTBOL flag */  
   BOOL   noteol;                /* NOTEOL flag */  
   BOOL   dotall;                /* Dot matches any char */  
   BOOL   endonly;               /* Dollar not before final \n */  
   uschar *start_subject;        /* Start of the subject string */  
   uschar *end_subject;          /* End of the subject string */  
   jmp_buf fail_env;             /* Environment for longjump() break out */  
   uschar *end_match_ptr;        /* Subject position at end match */  
   int     end_offset_top;       /* Highwater mark at end of match */  
 } match_data;  
115    
116    
117    
# Line 123  void  (*pcre_free)(void *) = free; Line 131  void  (*pcre_free)(void *) = free;
131    
132    
133  /*************************************************  /*************************************************
134    *             Default character tables           *
135    *************************************************/
136    
137    /* A default set of character tables is included in the PCRE binary. Its source
138    is built by the maketables auxiliary program, which uses the default C ctypes
139    functions, and put in the file chartables.c. These tables are used by PCRE
140    whenever the caller of pcre_compile() does not provide an alternate set of
141    tables. */
142    
143    #include "chartables.c"
144    
145    
146    
147    /*************************************************
148  *          Return version string                 *  *          Return version string                 *
149  *************************************************/  *************************************************/
150    
151  char *  const char *
152  pcre_version(void)  pcre_version(void)
153  {  {
154  return PCRE_VERSION;  return PCRE_VERSION;
# Line 140  return PCRE_VERSION; Line 162  return PCRE_VERSION;
162  *************************************************/  *************************************************/
163    
164  /* This function picks potentially useful data out of the private  /* This function picks potentially useful data out of the private
165  structure.  structure. The public options are passed back in an int - though the
166    re->options field has been expanded to a long int, all the public options
167    at the low end of it, and so even on 16-bit systems this will still be OK.
168    Therefore, I haven't changed the API for pcre_info().
169    
170  Arguments:  Arguments:
171    external_re   points to compiled code    external_re   points to compiled code
# Line 156  Returns:        number of identifying ex Line 181  Returns:        number of identifying ex
181  int  int
182  pcre_info(const pcre *external_re, int *optptr, int *first_char)  pcre_info(const pcre *external_re, int *optptr, int *first_char)
183  {  {
184  real_pcre *re = (real_pcre *)external_re;  const real_pcre *re = (const real_pcre *)external_re;
185  if (re == NULL) return PCRE_ERROR_NULL;  if (re == NULL) return PCRE_ERROR_NULL;
186  if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;  if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;
187  if (optptr != NULL) *optptr = (re->options & PUBLIC_OPTIONS);  if (optptr != NULL) *optptr = (int)(re->options & PUBLIC_OPTIONS);
188  if (first_char != NULL)  if (first_char != NULL)
189    *first_char = ((re->options & PCRE_FIRSTSET) != 0)? re->first_char :    *first_char = ((re->options & PCRE_FIRSTSET) != 0)? re->first_char :
190       ((re->options & PCRE_STARTLINE) != 0)? -1 : -2;       ((re->options & PCRE_STARTLINE) != 0)? -1 : -2;
# Line 186  Arguments: Line 211  Arguments:
211  Returns:     nothing  Returns:     nothing
212  */  */
213    
214  static pchars(uschar *p, int length, BOOL is_subject, match_data *md)  static void
215    pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
216  {  {
217  int c;  int c;
218  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
# Line 199  while (length-- > 0) Line 225  while (length-- > 0)
225    
226    
227  /*************************************************  /*************************************************
 *         Check subpattern for empty operand     *  
 *************************************************/  
   
 /* This function checks a bracketed subpattern to see if any of the paths  
 through it could match an empty string. This is used to diagnose an error if  
 such a subpattern is followed by a quantifier with an unlimited upper bound.  
   
 Argument:  
   code      points to the opening bracket  
   
 Returns:    TRUE or FALSE  
 */  
   
 static BOOL  
 could_be_empty(uschar *code)  
 {  
 do {  
   uschar *cc = code + 3;  
   
   /* Scan along the opcodes for this branch; as soon as we find something  
   that matches a non-empty string, break out and advance to test the next  
   branch. If we get to the end of the branch, return TRUE for the whole  
   sub-expression. */  
   
   for (;;)  
     {  
     /* Test an embedded subpattern; if it could not be empty, break the  
     loop. Otherwise carry on in the branch. */  
   
     if ((int)(*cc) >= OP_BRA || (int)(*cc) == OP_ONCE)  
       {  
       if (!could_be_empty(cc)) break;  
       do cc += (cc[1] << 8) + cc[2]; while (*cc == OP_ALT);  
       cc += 3;  
       }  
   
     else switch (*cc)  
       {  
       /* Reached end of a branch: the subpattern may match the empty string */  
   
       case OP_ALT:  
       case OP_KET:  
       case OP_KETRMAX:  
       case OP_KETRMIN:  
       return TRUE;  
   
       /* Skip over assertive subpatterns */  
   
       case OP_ASSERT:  
       case OP_ASSERT_NOT:  
       do cc += (cc[1] << 8) + cc[2]; while (*cc == OP_ALT);  
       cc += 3;  
       break;  
   
       /* Skip over things that don't match chars */  
   
       case OP_SOD:  
       case OP_EOD:  
       case OP_CIRC:  
       case OP_DOLL:  
       case OP_BRAZERO:  
       case OP_BRAMINZERO:  
       case OP_NOT_WORD_BOUNDARY:  
       case OP_WORD_BOUNDARY:  
       cc++;  
       break;  
   
       /* Skip over simple repeats with zero lower bound */  
   
       case OP_STAR:  
       case OP_MINSTAR:  
       case OP_QUERY:  
       case OP_MINQUERY:  
       case OP_NOTSTAR:  
       case OP_NOTMINSTAR:  
       case OP_NOTQUERY:  
       case OP_NOTMINQUERY:  
       case OP_TYPESTAR:  
       case OP_TYPEMINSTAR:  
       case OP_TYPEQUERY:  
       case OP_TYPEMINQUERY:  
       cc += 2;  
       break;  
   
       /* Skip over UPTOs (lower bound is zero) */  
   
       case OP_UPTO:  
       case OP_MINUPTO:  
       case OP_TYPEUPTO:  
       case OP_TYPEMINUPTO:  
       cc += 4;  
       break;  
   
       /* Check a class or a back reference for a zero minimum */  
   
       case OP_CLASS:  
       case OP_REF:  
       cc += (*cc == OP_REF)? 2 : 33;  
   
       switch (*cc)  
         {  
         case OP_CRSTAR:  
         case OP_CRMINSTAR:  
         case OP_CRQUERY:  
         case OP_CRMINQUERY:  
         cc++;  
         break;  
   
         case OP_CRRANGE:  
         case OP_CRMINRANGE:  
         if ((cc[1] << 8) + cc[2] != 0) goto NEXT_BRANCH;  
         cc += 3;  
         break;  
   
         default:  
         goto NEXT_BRANCH;  
         }  
       break;  
   
       /* Anything else matches at least one character */  
   
       default:  
       goto NEXT_BRANCH;  
       }  
     }  
   
   NEXT_BRANCH:  
   code += (code[1] << 8) + code[2];  
   }  
 while (*code == OP_ALT);  
   
 /* No branches match the empty string */  
   
 return FALSE;  
 }  
   
   
   
 /*************************************************  
228  *            Handle escapes                      *  *            Handle escapes                      *
229  *************************************************/  *************************************************/
230    
# Line 353  Arguments: Line 240  Arguments:
240    bracount   number of previous extracting brackets    bracount   number of previous extracting brackets
241    options    the options bits    options    the options bits
242    isclass    TRUE if inside a character class    isclass    TRUE if inside a character class
243      cd         pointer to char tables block
244    
245  Returns:     zero or positive => a data character  Returns:     zero or positive => a data character
246               negative => a special escape sequence               negative => a special escape sequence
# Line 360  Returns:     zero or positive => a data Line 248  Returns:     zero or positive => a data
248  */  */
249    
250  static int  static int
251  check_escape(uschar **ptrptr, char **errorptr, int bracount, int options,  check_escape(const uschar **ptrptr, const char **errorptr, int bracount,
252    BOOL isclass)    int options, BOOL isclass, compile_data *cd)
253  {  {
254  uschar *ptr = *ptrptr;  const uschar *ptr = *ptrptr;
255  int c = *(++ptr) & 255;   /* Ensure > 0 on signed-char systems */  int c = *(++ptr) & 255;   /* Ensure > 0 on signed-char systems */
256  int i;  int i;
257    
# Line 382  else if ((i = escapes[c - '0']) != 0) c Line 270  else if ((i = escapes[c - '0']) != 0) c
270    
271  else  else
272    {    {
273    uschar *oldptr;    const uschar *oldptr;
274    switch (c)    switch (c)
275      {      {
276      /* The handling of escape sequences consisting of a string of digits      /* The handling of escape sequences consisting of a string of digits
# Line 404  else Line 292  else
292        {        {
293        oldptr = ptr;        oldptr = ptr;
294        c -= '0';        c -= '0';
295        while ((pcre_ctypes[ptr[1]] & ctype_digit) != 0)        while ((cd->ctypes[ptr[1]] & ctype_digit) != 0)
296          c = c * 10 + *(++ptr) - '0';          c = c * 10 + *(++ptr) - '0';
297        if (c < 10 || c <= bracount)        if (c < 10 || c <= bracount)
298          {          {
# Line 430  else Line 318  else
318    
319      case '0':      case '0':
320      c -= '0';      c -= '0';
321      while(i++ < 2 && (pcre_ctypes[ptr[1]] & ctype_digit) != 0 &&      while(i++ < 2 && (cd->ctypes[ptr[1]] & ctype_digit) != 0 &&
322        ptr[1] != '8' && ptr[1] != '9')        ptr[1] != '8' && ptr[1] != '9')
323          c = c * 8 + *(++ptr) - '0';          c = c * 8 + *(++ptr) - '0';
324      break;      break;
# Line 439  else Line 327  else
327    
328      case 'x':      case 'x':
329      c = 0;      c = 0;
330      while (i++ < 2 && (pcre_ctypes[ptr[1]] & ctype_xdigit) != 0)      while (i++ < 2 && (cd->ctypes[ptr[1]] & ctype_xdigit) != 0)
331        {        {
332        ptr++;        ptr++;
333        c = c * 16 + pcre_lcc[*ptr] -        c = c * 16 + cd->lcc[*ptr] -
334          (((pcre_ctypes[*ptr] & ctype_digit) != 0)? '0' : 'W');          (((cd->ctypes[*ptr] & ctype_digit) != 0)? '0' : 'W');
335        }        }
336      break;      break;
337    
# Line 457  else Line 345  else
345    
346      /* A letter is upper-cased; then the 0x40 bit is flipped */      /* A letter is upper-cased; then the 0x40 bit is flipped */
347    
348      if (c >= 'a' && c <= 'z') c = pcre_fcc[c];      if (c >= 'a' && c <= 'z') c = cd->fcc[c];
349      c ^= 0x40;      c ^= 0x40;
350      break;      break;
351    
352      /* PCRE_EXTRA enables extensions to Perl in the matter of escapes. Any      /* PCRE_EXTRA enables extensions to Perl in the matter of escapes. Any
353      other alphameric following \ is an error if PCRE_EXTRA was set; otherwise,      other alphameric following \ is an error if PCRE_EXTRA was set; otherwise,
354      for Perl compatibility, it is a literal. */      for Perl compatibility, it is a literal. This code looks a bit odd, but
355        there used to be some cases other than the default, and there may be again
356        in future, so I haven't "optimized" it. */
357    
358      default:      default:
359      if ((options & PCRE_EXTRA) != 0) switch(c)      if ((options & PCRE_EXTRA) != 0) switch(c)
360        {        {
       case 'X':  
       c = -ESC_X;      /* This could be a lookup if it ever got into Perl */  
       break;  
   
361        default:        default:
362        *errorptr = ERR3;        *errorptr = ERR3;
363        break;        break;
# Line 497  where the ddds are digits. Line 383  where the ddds are digits.
383    
384  Arguments:  Arguments:
385    p         pointer to the first char after '{'    p         pointer to the first char after '{'
386      cd        pointer to char tables block
387    
388  Returns:    TRUE or FALSE  Returns:    TRUE or FALSE
389  */  */
390    
391  static BOOL  static BOOL
392  is_counted_repeat(uschar *p)  is_counted_repeat(const uschar *p, compile_data *cd)
393  {  {
394  if ((pcre_ctypes[*p++] & ctype_digit) == 0) return FALSE;  if ((cd->ctypes[*p++] & ctype_digit) == 0) return FALSE;
395  while ((pcre_ctypes[*p] & ctype_digit) != 0) p++;  while ((cd->ctypes[*p] & ctype_digit) != 0) p++;
396  if (*p == '}') return TRUE;  if (*p == '}') return TRUE;
397    
398  if (*p++ != ',') return FALSE;  if (*p++ != ',') return FALSE;
399  if (*p == '}') return TRUE;  if (*p == '}') return TRUE;
400    
401  if ((pcre_ctypes[*p++] & ctype_digit) == 0) return FALSE;  if ((cd->ctypes[*p++] & ctype_digit) == 0) return FALSE;
402  while ((pcre_ctypes[*p] & ctype_digit) != 0) p++;  while ((cd->ctypes[*p] & ctype_digit) != 0) p++;
403  return (*p == '}');  return (*p == '}');
404  }  }
405    
# Line 532  Arguments: Line 419  Arguments:
419    maxp       pointer to int for max    maxp       pointer to int for max
420               returned as -1 if no max               returned as -1 if no max
421    errorptr   points to pointer to error message    errorptr   points to pointer to error message
422      cd         pointer to character tables clock
423    
424  Returns:     pointer to '}' on success;  Returns:     pointer to '}' on success;
425               current ptr on error, with errorptr set               current ptr on error, with errorptr set
426  */  */
427    
428  static uschar *  static const uschar *
429  read_repeat_counts(uschar *p, int *minp, int *maxp, char **errorptr)  read_repeat_counts(const uschar *p, int *minp, int *maxp,
430      const char **errorptr, compile_data *cd)
431  {  {
432  int min = 0;  int min = 0;
433  int max = -1;  int max = -1;
434    
435  while ((pcre_ctypes[*p] & ctype_digit) != 0) min = min * 10 + *p++ - '0';  while ((cd->ctypes[*p] & ctype_digit) != 0) min = min * 10 + *p++ - '0';
436    
437  if (*p == '}') max = min; else  if (*p == '}') max = min; else
438    {    {
439    if (*(++p) != '}')    if (*(++p) != '}')
440      {      {
441      max = 0;      max = 0;
442      while((pcre_ctypes[*p] & ctype_digit) != 0) max = max * 10 + *p++ - '0';      while((cd->ctypes[*p] & ctype_digit) != 0) max = max * 10 + *p++ - '0';
443      if (max < min)      if (max < min)
444        {        {
445        *errorptr = ERR4;        *errorptr = ERR4;
# Line 575  return p; Line 464  return p;
464    
465    
466  /*************************************************  /*************************************************
467    *        Find the fixed length of a pattern      *
468    *************************************************/
469    
470    /* Scan a pattern and compute the fixed length of subject that will match it,
471    if the length is fixed. This is needed for dealing with backward assertions.
472    
473    Arguments:
474      code     points to the start of the pattern (the bracket)
475    
476    Returns:   the fixed length, or -1 if there is no fixed length
477    */
478    
479    static int
480    find_fixedlength(uschar *code)
481    {
482    int length = -1;
483    
484    register int branchlength = 0;
485    register uschar *cc = code + 3;
486    
487    /* Scan along the opcodes for this branch. If we get to the end of the
488    branch, check the length against that of the other branches. */
489    
490    for (;;)
491      {
492      int d;
493      register int op = *cc;
494      if (op >= OP_BRA) op = OP_BRA;
495    
496      switch (op)
497        {
498        case OP_BRA:
499        case OP_ONCE:
500        case OP_COND:
501        d = find_fixedlength(cc);
502        if (d < 0) return -1;
503        branchlength += d;
504        do cc += (cc[1] << 8) + cc[2]; while (*cc == OP_ALT);
505        cc += 3;
506        break;
507    
508        /* Reached end of a branch; if it's a ket it is the end of a nested
509        call. If it's ALT it is an alternation in a nested call. If it is
510        END it's the end of the outer call. All can be handled by the same code. */
511    
512        case OP_ALT:
513        case OP_KET:
514        case OP_KETRMAX:
515        case OP_KETRMIN:
516        case OP_END:
517        if (length < 0) length = branchlength;
518          else if (length != branchlength) return -1;
519        if (*cc != OP_ALT) return length;
520        cc += 3;
521        branchlength = 0;
522        break;
523    
524        /* Skip over assertive subpatterns */
525    
526        case OP_ASSERT:
527        case OP_ASSERT_NOT:
528        case OP_ASSERTBACK:
529        case OP_ASSERTBACK_NOT:
530        do cc += (cc[1] << 8) + cc[2]; while (*cc == OP_ALT);
531        cc += 3;
532        break;
533    
534        /* Skip over things that don't match chars */
535    
536        case OP_REVERSE:
537        cc++;
538        /* Fall through */
539    
540        case OP_CREF:
541        case OP_OPT:
542        cc++;
543        /* Fall through */
544    
545        case OP_SOD:
546        case OP_EOD:
547        case OP_EODN:
548        case OP_CIRC:
549        case OP_DOLL:
550        case OP_NOT_WORD_BOUNDARY:
551        case OP_WORD_BOUNDARY:
552        cc++;
553        break;
554    
555        /* Handle char strings */
556    
557        case OP_CHARS:
558        branchlength += *(++cc);
559        cc += *cc + 1;
560        break;
561    
562        /* Handle exact repetitions */
563    
564        case OP_EXACT:
565        case OP_TYPEEXACT:
566        branchlength += (cc[1] << 8) + cc[2];
567        cc += 4;
568        break;
569    
570        /* Handle single-char matchers */
571    
572        case OP_NOT_DIGIT:
573        case OP_DIGIT:
574        case OP_NOT_WHITESPACE:
575        case OP_WHITESPACE:
576        case OP_NOT_WORDCHAR:
577        case OP_WORDCHAR:
578        case OP_ANY:
579        branchlength++;
580        cc++;
581        break;
582    
583    
584        /* Check a class for variable quantification */
585    
586        case OP_CLASS:
587        cc += (*cc == OP_REF)? 2 : 33;
588    
589        switch (*cc)
590          {
591          case OP_CRSTAR:
592          case OP_CRMINSTAR:
593          case OP_CRQUERY:
594          case OP_CRMINQUERY:
595          return -1;
596    
597          case OP_CRRANGE:
598          case OP_CRMINRANGE:
599          if ((cc[1] << 8) + cc[2] != (cc[3] << 8) + cc[4]) return -1;
600          branchlength += (cc[1] << 8) + cc[2];
601          cc += 5;
602          break;
603    
604          default:
605          branchlength++;
606          }
607        break;
608    
609        /* Anything else is variable length */
610    
611        default:
612        return -1;
613        }
614      }
615    /* Control never gets here */
616    }
617    
618    
619    
620    
621    /*************************************************
622  *           Compile one branch                   *  *           Compile one branch                   *
623  *************************************************/  *************************************************/
624    
625  /* Scan the pattern, compiling it into the code vector.  /* Scan the pattern, compiling it into the code vector.
626    
627  Arguments:  Arguments:
628    options    the option bits    options      the option bits
629    bracket    points to number of brackets used    brackets     points to number of brackets used
630    code       points to the pointer to the current code point    code         points to the pointer to the current code point
631    ptrptr     points to the current pattern pointer    ptrptr       points to the current pattern pointer
632    errorptr   points to pointer to error message    errorptr     points to pointer to error message
633      optchanged   set to the value of the last OP_OPT item compiled
634      reqchar      set to the last literal character required, else -1
635      countlits    set to count of mandatory literal characters
636      cd           contains pointers to tables
637    
638  Returns:     TRUE on success  Returns:       TRUE on success
639               FALSE, with *errorptr set on error                 FALSE, with *errorptr set on error
640  */  */
641    
642  static BOOL  static BOOL
643  compile_branch(int options, int *brackets, uschar **codeptr, uschar **ptrptr,  compile_branch(int options, int *brackets, uschar **codeptr,
644    char **errorptr)    const uschar **ptrptr, const char **errorptr, int *optchanged,
645      int *reqchar, int *countlits, compile_data *cd)
646  {  {
647  int repeat_type, op_type;  int repeat_type, op_type;
648  int repeat_min, repeat_max;  int repeat_min, repeat_max;
649  int bravalue, length;  int bravalue, length;
650    int greedy_default, greedy_non_default;
651    int prevreqchar;
652    int condcount = 0;
653    int subcountlits = 0;
654  register int c;  register int c;
655  register uschar *code = *codeptr;  register uschar *code = *codeptr;
656  uschar *ptr = *ptrptr;  uschar *tempcode;
657    const uschar *ptr = *ptrptr;
658    const uschar *tempptr;
659  uschar *previous = NULL;  uschar *previous = NULL;
 uschar *oldptr;  
660  uschar class[32];  uschar class[32];
661    
662    /* Set up the default and non-default settings for greediness */
663    
664    greedy_default = ((options & PCRE_UNGREEDY) != 0);
665    greedy_non_default = greedy_default ^ 1;
666    
667    /* Initialize no required char, and count of literals */
668    
669    *reqchar = prevreqchar = -1;
670    *countlits = 0;
671    
672  /* Switch on next character until the end of the branch */  /* Switch on next character until the end of the branch */
673    
674  for (;; ptr++)  for (;; ptr++)
675    {    {
676    BOOL negate_class;    BOOL negate_class;
677    int  class_charcount;    int class_charcount;
678    int  class_lastchar;    int class_lastchar;
679      int newoptions;
680      int condref;
681      int subreqchar;
682    
683    c = *ptr;    c = *ptr;
684    if ((options & PCRE_EXTENDED) != 0)    if ((options & PCRE_EXTENDED) != 0)
685      {      {
686      if ((pcre_ctypes[c] & ctype_space) != 0) continue;      if ((cd->ctypes[c] & ctype_space) != 0) continue;
687      if (c == '#')      if (c == '#')
688        {        {
689        while ((c = *(++ptr)) != 0 && c != '\n');        while ((c = *(++ptr)) != 0 && c != '\n');
# Line 661  for (;; ptr++) Line 728  for (;; ptr++)
728      previous = code;      previous = code;
729      *code++ = OP_CLASS;      *code++ = OP_CLASS;
730    
731      /* If the first character is '^', set the negation flag */      /* If the first character is '^', set the negation flag and skip it. */
732    
733      if ((c = *(++ptr)) == '^')      if ((c = *(++ptr)) == '^')
734        {        {
# Line 697  for (;; ptr++) Line 764  for (;; ptr++)
764        /* Backslash may introduce a single character, or it may introduce one        /* Backslash may introduce a single character, or it may introduce one
765        of the specials, which just set a flag. Escaped items are checked for        of the specials, which just set a flag. Escaped items are checked for
766        validity in the pre-compiling pass. The sequence \b is a special case.        validity in the pre-compiling pass. The sequence \b is a special case.
767        Inside a class (and only there) it is treated as backslash. Elsewhere        Inside a class (and only there) it is treated as backspace. Elsewhere
768        it marks a word boundary. Other escapes have preset maps ready to        it marks a word boundary. Other escapes have preset maps ready to
769        or into the one we are building. We assume they have more than one        or into the one we are building. We assume they have more than one
770        character in them, so set class_count bigger than one. */        character in them, so set class_count bigger than one. */
771    
772        if (c == '\\')        if (c == '\\')
773          {          {
774          c = check_escape(&ptr, errorptr, *brackets, options, TRUE);          c = check_escape(&ptr, errorptr, *brackets, options, TRUE, cd);
775          if (-c == ESC_b) c = '\b';          if (-c == ESC_b) c = '\b';
776          else if (c < 0)          else if (c < 0)
777            {            {
778              register const uschar *cbits = cd->cbits;
779            class_charcount = 10;            class_charcount = 10;
780            switch (-c)            switch (-c)
781              {              {
782              case ESC_d:              case ESC_d:
783              for (c = 0; c < 32; c++) class[c] |= pcre_cbits[c+cbit_digit];              for (c = 0; c < 32; c++) class[c] |= cbits[c+cbit_digit];
784              continue;              continue;
785    
786              case ESC_D:              case ESC_D:
787              for (c = 0; c < 32; c++) class[c] |= ~pcre_cbits[c+cbit_digit];              for (c = 0; c < 32; c++) class[c] |= ~cbits[c+cbit_digit];
788              continue;              continue;
789    
790              case ESC_w:              case ESC_w:
791              for (c = 0; c < 32; c++)              for (c = 0; c < 32; c++)
792                class[c] |= (pcre_cbits[c] | pcre_cbits[c+cbit_word]);                class[c] |= (cbits[c+cbit_digit] | cbits[c+cbit_word]);
793              continue;              continue;
794    
795              case ESC_W:              case ESC_W:
796              for (c = 0; c < 32; c++)              for (c = 0; c < 32; c++)
797                class[c] |= ~(pcre_cbits[c] | pcre_cbits[c+cbit_word]);                class[c] |= ~(cbits[c+cbit_digit] | cbits[c+cbit_word]);
798              continue;              continue;
799    
800              case ESC_s:              case ESC_s:
801              for (c = 0; c < 32; c++) class[c] |= pcre_cbits[c+cbit_space];              for (c = 0; c < 32; c++) class[c] |= cbits[c+cbit_space];
802              continue;              continue;
803    
804              case ESC_S:              case ESC_S:
805              for (c = 0; c < 32; c++) class[c] |= ~pcre_cbits[c+cbit_space];              for (c = 0; c < 32; c++) class[c] |= ~cbits[c+cbit_space];
806              continue;              continue;
807    
808              default:              default:
# Line 766  for (;; ptr++) Line 834  for (;; ptr++)
834    
835          if (d == '\\')          if (d == '\\')
836            {            {
837            d = check_escape(&ptr, errorptr, *brackets, options, TRUE);            d = check_escape(&ptr, errorptr, *brackets, options, TRUE, cd);
838            if (d < 0)            if (d < 0)
839              {              {
840              if (d == -ESC_b) d = '\b'; else              if (d == -ESC_b) d = '\b'; else
# Line 788  for (;; ptr++) Line 856  for (;; ptr++)
856            class[c/8] |= (1 << (c&7));            class[c/8] |= (1 << (c&7));
857            if ((options & PCRE_CASELESS) != 0)            if ((options & PCRE_CASELESS) != 0)
858              {              {
859              int uc = pcre_fcc[c];           /* flip case */              int uc = cd->fcc[c];           /* flip case */
860              class[uc/8] |= (1 << (uc&7));              class[uc/8] |= (1 << (uc&7));
861              }              }
862            class_charcount++;                /* in case a one-char range */            class_charcount++;                /* in case a one-char range */
# Line 803  for (;; ptr++) Line 871  for (;; ptr++)
871        class [c/8] |= (1 << (c&7));        class [c/8] |= (1 << (c&7));
872        if ((options & PCRE_CASELESS) != 0)        if ((options & PCRE_CASELESS) != 0)
873          {          {
874          c = pcre_fcc[c];   /* flip case */          c = cd->fcc[c];   /* flip case */
875          class[c/8] |= (1 << (c&7));          class[c/8] |= (1 << (c&7));
876          }          }
877        class_charcount++;        class_charcount++;
# Line 850  for (;; ptr++) Line 918  for (;; ptr++)
918      /* Various kinds of repeat */      /* Various kinds of repeat */
919    
920      case '{':      case '{':
921      if (!is_counted_repeat(ptr+1)) goto NORMAL_CHAR;      if (!is_counted_repeat(ptr+1, cd)) goto NORMAL_CHAR;
922      ptr = read_repeat_counts(ptr+1, &repeat_min, &repeat_max, errorptr);      ptr = read_repeat_counts(ptr+1, &repeat_min, &repeat_max, errorptr, cd);
923      if (*errorptr != NULL) goto FAILED;      if (*errorptr != NULL) goto FAILED;
924      goto REPEAT;      goto REPEAT;
925    
# Line 876  for (;; ptr++) Line 944  for (;; ptr++)
944        goto FAILED;        goto FAILED;
945        }        }
946    
947      /* If the next character is '?' this is a minimizing repeat. Advance to the      /* If the next character is '?' this is a minimizing repeat, by default,
948        but if PCRE_UNGREEDY is set, it works the other way round. Advance to the
949      next character. */      next character. */
950    
951      if (ptr[1] == '?') { repeat_type = 1; ptr++; } else repeat_type = 0;      if (ptr[1] == '?')
952          { repeat_type = greedy_non_default; ptr++; }
953      /* If the maximum is zero then the minimum must also be zero; Perl allows      else repeat_type = greedy_default;
     this case, so we do too - by simply omitting the item altogether. */  
   
     if (repeat_max == 0) code = previous;  
954    
955      /* If previous was a string of characters, chop off the last one and use it      /* If previous was a string of characters, chop off the last one and use it
956      as the subject of the repeat. If there was only one character, we can      as the subject of the repeat. If there was only one character, we can
957      abolish the previous item altogether. */      abolish the previous item altogether. A repeat with a zero minimum wipes
958        out any reqchar setting, backing up to the previous value. We must also
959        adjust the countlits value. */
960    
961      else if (*previous == OP_CHARS)      if (*previous == OP_CHARS)
962        {        {
963        int len = previous[1];        int len = previous[1];
964    
965          if (repeat_min == 0) *reqchar = prevreqchar;
966          *countlits += repeat_min - 1;
967    
968        if (len == 1)        if (len == 1)
969          {          {
970          c = previous[2];          c = previous[2];
# Line 924  for (;; ptr++) Line 996  for (;; ptr++)
996      create a suitable repeat item. The code is shared with single-character      create a suitable repeat item. The code is shared with single-character
997      repeats by adding a suitable offset into repeat_type. */      repeats by adding a suitable offset into repeat_type. */
998    
999      else if ((int)*previous < OP_EOD || *previous == OP_ANY)      else if ((int)*previous < OP_EODN || *previous == OP_ANY)
1000        {        {
1001        op_type = OP_TYPESTAR - OP_STAR;  /* Use type opcodes */        op_type = OP_TYPESTAR - OP_STAR;  /* Use type opcodes */
1002        c = *previous;        c = *previous;
1003        code = previous;        code = previous;
1004    
1005        OUTPUT_SINGLE_REPEAT:        OUTPUT_SINGLE_REPEAT:
1006        repeat_type += op_type;      /* Combine both values for many cases */  
1007          /* If the maximum is zero then the minimum must also be zero; Perl allows
1008          this case, so we do too - by simply omitting the item altogether. */
1009    
1010          if (repeat_max == 0) goto END_REPEAT;
1011    
1012          /* Combine the op_type with the repeat_type */
1013    
1014          repeat_type += op_type;
1015    
1016        /* A minimum of zero is handled either as the special case * or ?, or as        /* A minimum of zero is handled either as the special case * or ?, or as
1017        an UPTO, with the maximum given. */        an UPTO, with the maximum given. */
# Line 968  for (;; ptr++) Line 1048  for (;; ptr++)
1048          /* If the mininum is 1 and the previous item was a character string,          /* If the mininum is 1 and the previous item was a character string,
1049          we either have to put back the item that got cancelled if the string          we either have to put back the item that got cancelled if the string
1050          length was 1, or add the character back onto the end of a longer          length was 1, or add the character back onto the end of a longer
1051          string. For a character type nothing need be done; it will just get put          string. For a character type nothing need be done; it will just get
1052          back naturally. */          put back naturally. Note that the final character is always going to
1053            get added below. */
1054    
1055          else if (*previous == OP_CHARS)          else if (*previous == OP_CHARS)
1056            {            {
1057            if (code == previous) code += 2; else previous[1]++;            if (code == previous) code += 2; else previous[1]++;
1058            }            }
1059    
1060          /* Insert an UPTO if the max is greater than the min. */          /*  For a single negated character we also have to put back the
1061            item that got cancelled. */
1062    
1063            else if (*previous == OP_NOT) code++;
1064    
1065            /* If the maximum is unlimited, insert an OP_STAR. */
1066    
1067          if (repeat_max != repeat_min)          if (repeat_max < 0)
1068              {
1069              *code++ = c;
1070              *code++ = OP_STAR + repeat_type;
1071              }
1072    
1073            /* Else insert an UPTO if the max is greater than the min. */
1074    
1075            else if (repeat_max != repeat_min)
1076            {            {
1077            *code++ = c;            *code++ = c;
1078            repeat_max -= repeat_min;            repeat_max -= repeat_min;
# Line 994  for (;; ptr++) Line 1088  for (;; ptr++)
1088        }        }
1089    
1090      /* If previous was a character class or a back reference, we put the repeat      /* If previous was a character class or a back reference, we put the repeat
1091      stuff after it. */      stuff after it, but just skip the item if the repeat was {0,0}. */
1092    
1093      else if (*previous == OP_CLASS || *previous == OP_REF)      else if (*previous == OP_CLASS || *previous == OP_REF)
1094        {        {
1095          if (repeat_max == 0)
1096            {
1097            code = previous;
1098            goto END_REPEAT;
1099            }
1100        if (repeat_min == 0 && repeat_max == -1)        if (repeat_min == 0 && repeat_max == -1)
1101          *code++ = OP_CRSTAR + repeat_type;          *code++ = OP_CRSTAR + repeat_type;
1102        else if (repeat_min == 1 && repeat_max == -1)        else if (repeat_min == 1 && repeat_max == -1)
# Line 1016  for (;; ptr++) Line 1115  for (;; ptr++)
1115        }        }
1116    
1117      /* If previous was a bracket group, we may have to replicate it in certain      /* If previous was a bracket group, we may have to replicate it in certain
1118      cases. If the maximum repeat count is unlimited, check that the bracket      cases. */
     group cannot match the empty string, and diagnose an error if it can. */  
1119    
1120      else if ((int)*previous >= OP_BRA)      else if ((int)*previous >= OP_BRA || (int)*previous == OP_ONCE ||
1121                 (int)*previous == OP_COND)
1122        {        {
1123        int i;        register int i;
1124        int length = code - previous;        int ketoffset = 0;
1125          int len = code - previous;
1126          uschar *bralink = NULL;
1127    
1128          /* If the maximum repeat count is unlimited, find the end of the bracket
1129          by scanning through from the start, and compute the offset back to it
1130          from the current code pointer. There may be an OP_OPT setting following
1131          the final KET, so we can't find the end just by going back from the code
1132          pointer. */
1133    
1134          if (repeat_max == -1)
1135            {
1136            register uschar *ket = previous;
1137            do ket += (ket[1] << 8) + ket[2]; while (*ket != OP_KET);
1138            ketoffset = code - ket;
1139            }
1140    
1141          /* The case of a zero minimum is special because of the need to stick
1142          OP_BRAZERO in front of it, and because the group appears once in the
1143          data, whereas in other cases it appears the minimum number of times. For
1144          this reason, it is simplest to treat this case separately, as otherwise
1145          the code gets far too mess. There are several special subcases when the
1146          minimum is zero. */
1147    
1148        if (repeat_max == -1 && could_be_empty(previous))        if (repeat_min == 0)
1149          {          {
1150          *errorptr = ERR10;          /* If we set up a required char from the bracket, we must back off
1151          goto FAILED;          to the previous value and reset the countlits value too. */
         }  
1152    
1153        /* If the minimum is greater than zero, and the maximum is unlimited or          if (subcountlits > 0)
1154        equal to the minimum, the first copy remains where it is, and is            {
1155        replicated up to the minimum number of times. This case includes the +            *reqchar = prevreqchar;
1156        repeat, but of course no replication is needed in that case. */            *countlits -= subcountlits;
1157              }
1158    
1159        if (repeat_min > 0 && (repeat_max == -1 || repeat_max == repeat_min))          /* If the maximum is also zero, we just omit the group from the output
1160          {          altogether. */
1161          for (i = 1; i < repeat_min; i++)  
1162            if (repeat_max == 0)
1163            {            {
1164            memcpy(code, previous, length);            code = previous;
1165            code += length;            goto END_REPEAT;
1166            }            }
         }  
1167    
1168        /* If the minimum is zero, stick BRAZERO in front of the first copy.          /* If the maximum is 1 or unlimited, we just have to stick in the
1169        Then, if there is a fixed upper limit, replicated up to that many times,          BRAZERO and do no more at this point. */
       sticking BRAZERO in front of all the optional ones. */  
1170    
1171        else          if (repeat_max <= 1)
         {  
         if (repeat_min == 0)  
1172            {            {
1173            memmove(previous+1, previous, length);            memmove(previous+1, previous, len);
1174            code++;            code++;
1175            *previous++ = OP_BRAZERO + repeat_type;            *previous++ = OP_BRAZERO + repeat_type;
1176            }            }
1177    
1178            /* If the maximum is greater than 1 and limited, we have to replicate
1179            in a nested fashion, sticking OP_BRAZERO before each set of brackets.
1180            The first one has to be handled carefully because it's the original
1181            copy, which has to be moved up. The remainder can be handled by code
1182            that is common with the non-zero minimum case below. We just have to
1183            adjust the value or repeat_max, since one less copy is required. */
1184    
1185            else
1186              {
1187              int offset;
1188              memmove(previous+4, previous, len);
1189              code += 4;
1190              *previous++ = OP_BRAZERO + repeat_type;
1191              *previous++ = OP_BRA;
1192    
1193              /* We chain together the bracket offset fields that have to be
1194              filled in later when the ends of the brackets are reached. */
1195    
1196              offset = (bralink == NULL)? 0 : previous - bralink;
1197              bralink = previous;
1198              *previous++ = offset >> 8;
1199              *previous++ = offset & 255;
1200              }
1201    
1202            repeat_max--;
1203            }
1204    
1205          /* If the minimum is greater than zero, replicate the group as many
1206          times as necessary, and adjust the maximum to the number of subsequent
1207          copies that we need. */
1208    
1209          else
1210            {
1211          for (i = 1; i < repeat_min; i++)          for (i = 1; i < repeat_min; i++)
1212            {            {
1213            memcpy(code, previous, length);            memcpy(code, previous, len);
1214            code += length;            code += len;
1215            }            }
1216            if (repeat_max > 0) repeat_max -= repeat_min;
1217            }
1218    
1219          /* This code is common to both the zero and non-zero minimum cases. If
1220          the maximum is limited, it replicates the group in a nested fashion,
1221          remembering the bracket starts on a stack. In the case of a zero minimum,
1222          the first one was set up above. In all cases the repeat_max now specifies
1223          the number of additional copies needed. */
1224    
1225          for (i = (repeat_min > 0)? repeat_min : 1; i < repeat_max; i++)        if (repeat_max >= 0)
1226            {
1227            for (i = repeat_max - 1; i >= 0; i--)
1228            {            {
1229            *code++ = OP_BRAZERO + repeat_type;            *code++ = OP_BRAZERO + repeat_type;
1230            memcpy(code, previous, length);  
1231            code += length;            /* All but the final copy start a new nesting, maintaining the
1232              chain of brackets outstanding. */
1233    
1234              if (i != 0)
1235                {
1236                int offset;
1237                *code++ = OP_BRA;
1238                offset = (bralink == NULL)? 0 : code - bralink;
1239                bralink = code;
1240                *code++ = offset >> 8;
1241                *code++ = offset & 255;
1242                }
1243    
1244              memcpy(code, previous, len);
1245              code += len;
1246              }
1247    
1248            /* Now chain through the pending brackets, and fill in their length
1249            fields (which are holding the chain links pro tem). */
1250    
1251            while (bralink != NULL)
1252              {
1253              int oldlinkoffset;
1254              int offset = code - bralink + 1;
1255              uschar *bra = code - offset;
1256              oldlinkoffset = (bra[1] << 8) + bra[2];
1257              bralink = (oldlinkoffset == 0)? NULL : bralink - oldlinkoffset;
1258              *code++ = OP_KET;
1259              *code++ = bra[1] = offset >> 8;
1260              *code++ = bra[2] = (offset & 255);
1261            }            }
1262          }          }
1263    
1264        /* If the maximum is unlimited, set a repeater in the final copy. */        /* If the maximum is unlimited, set a repeater in the final copy. We
1265          can't just offset backwards from the current code point, because we
1266          don't know if there's been an options resetting after the ket. The
1267          correct offset was computed above. */
1268    
1269        if (repeat_max == -1) code[-3] = OP_KETRMAX + repeat_type;        else code[-ketoffset] = OP_KETRMAX + repeat_type;
1270        }        }
1271    
1272      /* Else there's some kind of shambles */      /* Else there's some kind of shambles */
# Line 1086  for (;; ptr++) Line 1279  for (;; ptr++)
1279    
1280      /* In all case we no longer have a previous item. */      /* In all case we no longer have a previous item. */
1281    
1282        END_REPEAT:
1283      previous = NULL;      previous = NULL;
1284      break;      break;
1285    
1286    
1287      /* Start of nested bracket sub-expression, or comment or lookahead.      /* Start of nested bracket sub-expression, or comment or lookahead or
1288      First deal with special things that can come after a bracket; all are      lookbehind or option setting or condition. First deal with special things
1289      introduced by ?, and the appearance of any of them means that this is not a      that can come after a bracket; all are introduced by ?, and the appearance
1290      referencing group. They were checked for validity in the first pass over      of any of them means that this is not a referencing group. They were
1291      the string, so we don't have to check for syntax errors here.  */      checked for validity in the first pass over the string, so we don't have to
1292        check for syntax errors here.  */
1293    
1294      case '(':      case '(':
1295      previous = code;              /* Only real brackets can be repeated */      newoptions = options;
1296        condref = -1;
1297    
1298      if (*(++ptr) == '?')      if (*(++ptr) == '?')
1299        {        {
1300        bravalue = OP_BRA;        int set, unset;
1301          int *optset;
1302    
1303        switch (*(++ptr))        switch (*(++ptr))
1304          {          {
1305          case '#':          case '#':                 /* Comment; skip to ket */
         case 'i':  
         case 'm':  
         case 's':  
         case 'x':  
1306          ptr++;          ptr++;
1307          while (*ptr != ')') ptr++;          while (*ptr != ')') ptr++;
         previous = NULL;  
1308          continue;          continue;
1309    
1310          case ':':                 /* Non-extracting bracket */          case ':':                 /* Non-extracting bracket */
1311            bravalue = OP_BRA;
1312          ptr++;          ptr++;
1313          break;          break;
1314    
1315          case '=':                 /* Assertions can't be repeated */          case '(':
1316            bravalue = OP_COND;       /* Conditional group */
1317            if ((cd->ctypes[*(++ptr)] & ctype_digit) != 0)
1318              {
1319              condref = *ptr - '0';
1320              while (*(++ptr) != ')') condref = condref*10 + *ptr - '0';
1321              ptr++;
1322              }
1323            else ptr--;
1324            break;
1325    
1326            case '=':                 /* Positive lookahead */
1327          bravalue = OP_ASSERT;          bravalue = OP_ASSERT;
1328          ptr++;          ptr++;
         previous = NULL;  
1329          break;          break;
1330    
1331          case '!':          case '!':                 /* Negative lookahead */
1332          bravalue = OP_ASSERT_NOT;          bravalue = OP_ASSERT_NOT;
1333          ptr++;          ptr++;
         previous = NULL;  
1334          break;          break;
1335    
1336          case '>':                         /* "Match once" brackets */          case '<':                 /* Lookbehinds */
1337          if ((options & PCRE_EXTRA) != 0)  /* Not yet standard */          switch (*(++ptr))
1338            {            {
1339            bravalue = OP_ONCE;            case '=':               /* Positive lookbehind */
1340              bravalue = OP_ASSERTBACK;
1341              ptr++;
1342              break;
1343    
1344              case '!':               /* Negative lookbehind */
1345              bravalue = OP_ASSERTBACK_NOT;
1346            ptr++;            ptr++;
           previous = NULL;  
1347            break;            break;
1348    
1349              default:                /* Syntax error */
1350              *errorptr = ERR24;
1351              goto FAILED;
1352            }            }
1353          /* Else fall through */          break;
1354    
1355          default:          case '>':                 /* One-time brackets */
1356          *errorptr = ERR12;          bravalue = OP_ONCE;
1357          goto FAILED;          ptr++;
1358            break;
1359    
1360            default:                  /* Option setting */
1361            set = unset = 0;
1362            optset = &set;
1363    
1364            while (*ptr != ')' && *ptr != ':')
1365              {
1366              switch (*ptr++)
1367                {
1368                case '-': optset = &unset; break;
1369    
1370                case 'i': *optset |= PCRE_CASELESS; break;
1371                case 'm': *optset |= PCRE_MULTILINE; break;
1372                case 's': *optset |= PCRE_DOTALL; break;
1373                case 'x': *optset |= PCRE_EXTENDED; break;
1374                case 'U': *optset |= PCRE_UNGREEDY; break;
1375                case 'X': *optset |= PCRE_EXTRA; break;
1376    
1377                default:
1378                *errorptr = ERR12;
1379                goto FAILED;
1380                }
1381              }
1382    
1383            /* Set up the changed option bits, but don't change anything yet. */
1384    
1385            newoptions = (options | set) & (~unset);
1386    
1387            /* If the options ended with ')' this is not the start of a nested
1388            group with option changes, so the options change at this level. At top
1389            level there is nothing else to be done (the options will in fact have
1390            been set from the start of compiling as a result of the first pass) but
1391            at an inner level we must compile code to change the ims options if
1392            necessary, and pass the new setting back so that it can be put at the
1393            start of any following branches, and when this group ends, a resetting
1394            item can be compiled. */
1395    
1396            if (*ptr == ')')
1397              {
1398              if ((options & PCRE_INGROUP) != 0 &&
1399                  (options & PCRE_IMS) != (newoptions & PCRE_IMS))
1400                {
1401                *code++ = OP_OPT;
1402                *code++ = *optchanged = newoptions & PCRE_IMS;
1403                }
1404              options = newoptions;  /* Change options at this level */
1405              previous = NULL;       /* This item can't be repeated */
1406              continue;              /* It is complete */
1407              }
1408    
1409            /* If the options ended with ':' we are heading into a nested group
1410            with possible change of options. Such groups are non-capturing and are
1411            not assertions of any kind. All we need to do is skip over the ':';
1412            the newoptions value is handled below. */
1413    
1414            bravalue = OP_BRA;
1415            ptr++;
1416          }          }
1417        }        }
1418    
1419      /* Else we have a referencing group */      /* Else we have a referencing group; adjust the opcode. */
1420    
1421      else      else
1422        {        {
# Line 1158  for (;; ptr++) Line 1428  for (;; ptr++)
1428        bravalue = OP_BRA + *brackets;        bravalue = OP_BRA + *brackets;
1429        }        }
1430    
1431      /* Process nested bracketed re; at end pointer is on the bracket. We copy      /* Process nested bracketed re. Assertions may not be repeated, but other
1432      code into a non-register variable in order to be able to pass its address      kinds can be. We copy code into a non-register variable in order to be able
1433      because some compilers complain otherwise. */      to pass its address because some compilers complain otherwise. Pass in a
1434        new setting for the ims options if they have changed. */
1435    
1436        previous = (bravalue >= OP_ONCE)? code : NULL;
1437      *code = bravalue;      *code = bravalue;
1438        tempcode = code;
1439    
1440        if (!compile_regex(
1441             options | PCRE_INGROUP,       /* Set for all nested groups */
1442             ((options & PCRE_IMS) != (newoptions & PCRE_IMS))?
1443               newoptions & PCRE_IMS : -1, /* Pass ims options if changed */
1444             brackets,                     /* Bracket level */
1445             &tempcode,                    /* Where to put code (updated) */
1446             &ptr,                         /* Input pointer (updated) */
1447             errorptr,                     /* Where to put an error message */
1448             (bravalue == OP_ASSERTBACK ||
1449              bravalue == OP_ASSERTBACK_NOT), /* TRUE if back assert */
1450             condref,                      /* Condition reference number */
1451             &subreqchar,                  /* For possible last char */
1452             &subcountlits,                /* For literal count */
1453             cd))                          /* Tables block */
1454          goto FAILED;
1455    
1456        /* At the end of compiling, code is still pointing to the start of the
1457        group, while tempcode has been updated to point past the end of the group
1458        and any option resetting that may follow it. The pattern pointer (ptr)
1459        is on the bracket. */
1460    
1461        /* If this is a conditional bracket, check that there are no more than
1462        two branches in the group. */
1463    
1464        if (bravalue == OP_COND)
1465        {        {
1466        uschar *mcode = code;        uschar *tc = code;
1467        if (!compile_regex(options, brackets, &mcode, &ptr, errorptr))        condcount = 0;
1468    
1469          do {
1470             condcount++;
1471             tc += (tc[1] << 8) | tc[2];
1472             }
1473          while (*tc != OP_KET);
1474    
1475          if (condcount > 2)
1476            {
1477            *errorptr = ERR27;
1478          goto FAILED;          goto FAILED;
1479        code = mcode;          }
1480          }
1481    
1482        /* Handle updating of the required character. If the subpattern didn't
1483        set one, leave it as it was. Otherwise, update it for normal brackets of
1484        all kinds, forward assertions, and conditions with two branches. Don't
1485        update the literal count for forward assertions, however. If the bracket
1486        is followed by a quantifier with zero repeat, we have to back off. Hence
1487        the definition of prevreqchar and subcountlits outside the main loop so
1488        that they can be accessed for the back off. */
1489    
1490        if (subreqchar > 0 &&
1491             (bravalue >= OP_BRA || bravalue == OP_ONCE || bravalue == OP_ASSERT ||
1492             (bravalue == OP_COND && condcount == 2)))
1493          {
1494          prevreqchar = *reqchar;
1495          *reqchar = subreqchar;
1496          if (bravalue != OP_ASSERT) *countlits += subcountlits;
1497        }        }
1498    
1499        /* Now update the main code pointer to the end of the group. */
1500    
1501        code = tempcode;
1502    
1503        /* Error if hit end of pattern */
1504    
1505      if (*ptr != ')')      if (*ptr != ')')
1506        {        {
1507        *errorptr = ERR14;        *errorptr = ERR14;
# Line 1182  for (;; ptr++) Line 1514  for (;; ptr++)
1514      for validity in the pre-compiling pass. */      for validity in the pre-compiling pass. */
1515    
1516      case '\\':      case '\\':
1517      oldptr = ptr;      tempptr = ptr;
1518      c = check_escape(&ptr, errorptr, *brackets, options, FALSE);      c = check_escape(&ptr, errorptr, *brackets, options, FALSE, cd);
1519    
1520      /* Handle metacharacters introduced by \. For ones like \d, the ESC_ values      /* Handle metacharacters introduced by \. For ones like \d, the ESC_ values
1521      are arranged to be the negation of the corresponding OP_values. For the      are arranged to be the negation of the corresponding OP_values. For the
# Line 1196  for (;; ptr++) Line 1528  for (;; ptr++)
1528        {        {
1529        if (-c >= ESC_REF)        if (-c >= ESC_REF)
1530          {          {
         int refnum = -c - ESC_REF;  
         if (*brackets < refnum)  
           {  
           *errorptr = ERR15;  
           goto FAILED;  
           }  
1531          previous = code;          previous = code;
1532          *code++ = OP_REF;          *code++ = OP_REF;
1533          *code++ = refnum;          *code++ = -c - ESC_REF;
1534          }          }
1535        else        else
1536          {          {
1537          previous = (-c > ESC_b && -c < ESC_X)? code : NULL;          previous = (-c > ESC_b && -c < ESC_Z)? code : NULL;
1538          *code++ = -c;          *code++ = -c;
1539          }          }
1540        continue;        continue;
1541        }        }
1542    
1543      /* Reset and fall through */      /* Data character: reset and fall through */
1544    
1545      ptr = oldptr;      ptr = tempptr;
1546      c = '\\';      c = '\\';
1547    
1548      /* Handle a run of data characters until a metacharacter is encountered.      /* Handle a run of data characters until a metacharacter is encountered.
# Line 1234  for (;; ptr++) Line 1560  for (;; ptr++)
1560        {        {
1561        if ((options & PCRE_EXTENDED) != 0)        if ((options & PCRE_EXTENDED) != 0)
1562          {          {
1563          if ((pcre_ctypes[c] & ctype_space) != 0) continue;          if ((cd->ctypes[c] & ctype_space) != 0) continue;
1564          if (c == '#')          if (c == '#')
1565            {            {
1566            while ((c = *(++ptr)) != 0 && c != '\n');            while ((c = *(++ptr)) != 0 && c != '\n');
# Line 1249  for (;; ptr++) Line 1575  for (;; ptr++)
1575    
1576        if (c == '\\')        if (c == '\\')
1577          {          {
1578          oldptr = ptr;          tempptr = ptr;
1579          c = check_escape(&ptr, errorptr, *brackets, options, FALSE);          c = check_escape(&ptr, errorptr, *brackets, options, FALSE, cd);
1580          if (c < 0) { ptr = oldptr; break; }          if (c < 0) { ptr = tempptr; break; }
1581          }          }
1582    
1583        /* Ordinary character or single-char escape */        /* Ordinary character or single-char escape */
# Line 1262  for (;; ptr++) Line 1588  for (;; ptr++)
1588    
1589      /* This "while" is the end of the "do" above. */      /* This "while" is the end of the "do" above. */
1590    
1591      while (length < 255 && (pcre_ctypes[c = *(++ptr)] & ctype_meta) == 0);      while (length < 255 && (cd->ctypes[c = *(++ptr)] & ctype_meta) == 0);
1592    
1593        /* Update the last character and the count of literals */
1594    
1595        prevreqchar = (length > 1)? code[-2] : *reqchar;
1596        *reqchar = code[-1];
1597        *countlits += length;
1598    
1599      /* Compute the length and set it in the data vector, and advance to      /* Compute the length and set it in the data vector, and advance to
1600      the next state. */      the next state. */
1601    
1602      previous[1] = length;      previous[1] = length;
1603      ptr--;      if (length < 255) ptr--;
1604      break;      break;
1605      }      }
1606    }                   /* end of big loop */    }                   /* end of big loop */
# Line 1292  return FALSE; Line 1624  return FALSE;
1624  /* On entry, ptr is pointing past the bracket character, but on return  /* On entry, ptr is pointing past the bracket character, but on return
1625  it points to the closing bracket, or vertical bar, or end of string.  it points to the closing bracket, or vertical bar, or end of string.
1626  The code variable is pointing at the byte into which the BRA operator has been  The code variable is pointing at the byte into which the BRA operator has been
1627  stored.  stored. If the ims options are changed at the start (for a (?ims: group) or
1628    during any branch, we need to insert an OP_OPT item at the start of every
1629    following branch to ensure they get set correctly at run time, and also pass
1630    the new options into every subsequent branch compile.
1631    
1632  Argument:  Argument:
1633    options   the option bits    options     the option bits
1634    brackets  -> int containing the number of extracting brackets used    optchanged  new ims options to set as if (?ims) were at the start, or -1
1635    codeptr   -> the address of the current code pointer                 for no change
1636    ptrptr    -> the address of the current pattern pointer    brackets    -> int containing the number of extracting brackets used
1637    errorptr  -> pointer to error message    codeptr     -> the address of the current code pointer
1638      ptrptr      -> the address of the current pattern pointer
1639      errorptr    -> pointer to error message
1640      lookbehind  TRUE if this is a lookbehind assertion
1641      condref     > 0 for OPT_CREF setting at start of conditional group
1642      reqchar     -> place to put the last required character, or a negative number
1643      countlits   -> place to put the shortest literal count of any branch
1644      cd          points to the data block with tables pointers
1645    
1646  Returns:    TRUE on success  Returns:      TRUE on success
1647  */  */
1648    
1649  static BOOL  static BOOL
1650  compile_regex(int options, int *brackets, uschar **codeptr, uschar **ptrptr,  compile_regex(int options, int optchanged, int *brackets, uschar **codeptr,
1651    char **errorptr)    const uschar **ptrptr, const char **errorptr, BOOL lookbehind, int condref,
1652      int *reqchar, int *countlits, compile_data *cd)
1653  {  {
1654  uschar *ptr = *ptrptr;  const uschar *ptr = *ptrptr;
1655  uschar *code = *codeptr;  uschar *code = *codeptr;
1656    uschar *last_branch = code;
1657  uschar *start_bracket = code;  uschar *start_bracket = code;
1658    uschar *reverse_count = NULL;
1659    int oldoptions = options & PCRE_IMS;
1660    int branchreqchar, branchcountlits;
1661    
1662    *reqchar = -1;
1663    *countlits = INT_MAX;
1664    code += 3;
1665    
1666    /* At the start of a reference-based conditional group, insert the reference
1667    number as an OP_CREF item. */
1668    
1669    if (condref > 0)
1670      {
1671      *code++ = OP_CREF;
1672      *code++ = condref;
1673      }
1674    
1675    /* Loop for each alternative branch */
1676    
1677  for (;;)  for (;;)
1678    {    {
1679    int length;    int length;
   uschar *last_branch = code;  
1680    
1681    code += 3;    /* Handle change of options */
1682    if (!compile_branch(options, brackets, &code, &ptr, errorptr))  
1683      if (optchanged >= 0)
1684        {
1685        *code++ = OP_OPT;
1686        *code++ = optchanged;
1687        options = (options & ~PCRE_IMS) | optchanged;
1688        }
1689    
1690      /* Set up dummy OP_REVERSE if lookbehind assertion */
1691    
1692      if (lookbehind)
1693        {
1694        *code++ = OP_REVERSE;
1695        reverse_count = code;
1696        *code++ = 0;
1697        *code++ = 0;
1698        }
1699    
1700      /* Now compile the branch */
1701    
1702      if (!compile_branch(options, brackets, &code, &ptr, errorptr, &optchanged,
1703          &branchreqchar, &branchcountlits, cd))
1704      {      {
1705      *ptrptr = ptr;      *ptrptr = ptr;
1706      return FALSE;      return FALSE;
# Line 1330  for (;;) Line 1712  for (;;)
1712    last_branch[1] = length >> 8;    last_branch[1] = length >> 8;
1713    last_branch[2] = length & 255;    last_branch[2] = length & 255;
1714    
1715      /* Save the last required character if all branches have the same; a current
1716      value of -1 means unset, while -2 means "previous branch had no last required
1717      char".  */
1718    
1719      if (*reqchar != -2)
1720        {
1721        if (branchreqchar >= 0)
1722          {
1723          if (*reqchar == -1) *reqchar = branchreqchar;
1724          else if (*reqchar != branchreqchar) *reqchar = -2;
1725          }
1726        else *reqchar = -2;
1727        }
1728    
1729      /* Keep the shortest literal count */
1730    
1731      if (branchcountlits < *countlits) *countlits = branchcountlits;
1732      DPRINTF(("literal count = %d min=%d\n", branchcountlits, *countlits));
1733    
1734      /* If lookbehind, check that this branch matches a fixed-length string,
1735      and put the length into the OP_REVERSE item. Temporarily mark the end of
1736      the branch with OP_END. */
1737    
1738      if (lookbehind)
1739        {
1740        *code = OP_END;
1741        length = find_fixedlength(last_branch);
1742        DPRINTF(("fixed length = %d\n", length));
1743        if (length < 0)
1744          {
1745          *errorptr = ERR25;
1746          *ptrptr = ptr;
1747          return FALSE;
1748          }
1749        reverse_count[0] = (length >> 8);
1750        reverse_count[1] = length & 255;
1751        }
1752    
1753    /* Reached end of expression, either ')' or end of pattern. Insert a    /* Reached end of expression, either ')' or end of pattern. Insert a
1754    terminating ket and the length of the whole bracketed item, and return,    terminating ket and the length of the whole bracketed item, and return,
1755    leaving the pointer at the terminating char. */    leaving the pointer at the terminating char. If any of the ims options
1756      were changed inside the group, compile a resetting op-code following. */
1757    
1758    if (*ptr != '|')    if (*ptr != '|')
1759      {      {
# Line 1340  for (;;) Line 1761  for (;;)
1761      *code++ = OP_KET;      *code++ = OP_KET;
1762      *code++ = length >> 8;      *code++ = length >> 8;
1763      *code++ = length & 255;      *code++ = length & 255;
1764        if (optchanged >= 0)
1765          {
1766          *code++ = OP_OPT;
1767          *code++ = oldoptions;
1768          }
1769      *codeptr = code;      *codeptr = code;
1770      *ptrptr = ptr;      *ptrptr = ptr;
1771      return TRUE;      return TRUE;
# Line 1348  for (;;) Line 1774  for (;;)
1774    /* Another branch follows; insert an "or" node and advance the pointer. */    /* Another branch follows; insert an "or" node and advance the pointer. */
1775    
1776    *code = OP_ALT;    *code = OP_ALT;
1777      last_branch = code;
1778      code += 3;
1779    ptr++;    ptr++;
1780    }    }
1781  /* Control never reaches here */  /* Control never reaches here */
# Line 1355  for (;;) Line 1783  for (;;)
1783    
1784    
1785    
1786    
1787    /*************************************************
1788    *      Find first significant op code            *
1789    *************************************************/
1790    
1791    /* This is called by several functions that scan a compiled expression looking
1792    for a fixed first character, or an anchoring op code etc. It skips over things
1793    that do not influence this. For one application, a change of caseless option is
1794    important.
1795    
1796    Arguments:
1797      code       pointer to the start of the group
1798      options    pointer to external options
1799      optbit     the option bit whose changing is significant, or
1800                 zero if none are
1801      optstop    TRUE to return on option change, otherwise change the options
1802                   value and continue
1803    
1804    Returns:     pointer to the first significant opcode
1805    */
1806    
1807    static const uschar*
1808    first_significant_code(const uschar *code, int *options, int optbit,
1809      BOOL optstop)
1810    {
1811    for (;;)
1812      {
1813      switch ((int)*code)
1814        {
1815        case OP_OPT:
1816        if (optbit > 0 && ((int)code[1] & optbit) != (*options & optbit))
1817          {
1818          if (optstop) return code;
1819          *options = (int)code[1];
1820          }
1821        code += 2;
1822        break;
1823    
1824        case OP_CREF:
1825        code += 2;
1826        break;
1827    
1828        case OP_WORD_BOUNDARY:
1829        case OP_NOT_WORD_BOUNDARY:
1830        code++;
1831        break;
1832    
1833        case OP_ASSERT_NOT:
1834        case OP_ASSERTBACK:
1835        case OP_ASSERTBACK_NOT:
1836        do code += (code[1] << 8) + code[2]; while (*code == OP_ALT);
1837        code += 3;
1838        break;
1839    
1840        default:
1841        return code;
1842        }
1843      }
1844    /* Control never reaches here */
1845    }
1846    
1847    
1848    
1849    
1850  /*************************************************  /*************************************************
1851  *          Check for anchored expression         *  *          Check for anchored expression         *
1852  *************************************************/  *************************************************/
# Line 1365  all of whose alternatives start with OP_ Line 1857  all of whose alternatives start with OP_
1857  it's anchored. However, if this is a multiline pattern, then only OP_SOD  it's anchored. However, if this is a multiline pattern, then only OP_SOD
1858  counts, since OP_CIRC can match in the middle.  counts, since OP_CIRC can match in the middle.
1859    
1860  A branch is also implicitly anchored if it starts with .* because that will try  A branch is also implicitly anchored if it starts with .* and DOTALL is set,
1861  the rest of the pattern at all possible matching points, so there is no point  because that will try the rest of the pattern at all possible matching points,
1862  trying them again.  so there is no point trying them again.
1863    
1864  Argument:  points to start of expression (the bracket)  Arguments:
1865  Returns:   TRUE or FALSE    code       points to start of expression (the bracket)
1866      options    points to the options setting
1867    
1868    Returns:     TRUE or FALSE
1869  */  */
1870    
1871  static BOOL  static BOOL
1872  is_anchored(register uschar *code, BOOL multiline)  is_anchored(register const uschar *code, int *options)
1873  {  {
1874  do {  do {
1875     int op = (int)code[3];     const uschar *scode = first_significant_code(code + 3, options,
1876     if (op >= OP_BRA || op == OP_ASSERT || op == OP_ONCE)       PCRE_MULTILINE, FALSE);
1877       { if (!is_anchored(code+3, multiline)) return FALSE; }     register int op = *scode;
1878     else if (op == OP_TYPESTAR || op == OP_TYPEMINSTAR)     if (op >= OP_BRA || op == OP_ASSERT || op == OP_ONCE || op == OP_COND)
1879       { if (code[4] != OP_ANY) return FALSE; }       { if (!is_anchored(scode, options)) return FALSE; }
1880     else if (op != OP_SOD && (multiline || op != OP_CIRC)) return FALSE;     else if ((op == OP_TYPESTAR || op == OP_TYPEMINSTAR) &&
1881                (*options & PCRE_DOTALL) != 0)
1882         { if (scode[1] != OP_ANY) return FALSE; }
1883       else if (op != OP_SOD &&
1884               ((*options & PCRE_MULTILINE) != 0 || op != OP_CIRC))
1885         return FALSE;
1886     code += (code[1] << 8) + code[2];     code += (code[1] << 8) + code[2];
1887     }     }
1888  while (*code == OP_ALT);  while (*code == OP_ALT);
# Line 1392  return TRUE; Line 1892  return TRUE;
1892    
1893    
1894  /*************************************************  /*************************************************
1895  *     Check for start with \n line expression    *  *         Check for starting with ^ or .*        *
1896  *************************************************/  *************************************************/
1897    
1898  /* This is called for multiline expressions to try to find out if every branch  /* This is called to find out if every branch starts with ^ or .* so that
1899  starts with ^ so that "first char" processing can be done to speed things up.  "first char" processing can be done to speed things up in multiline
1900    matching and for non-DOTALL patterns that start with .* (which must start at
1901    the beginning or after \n).
1902    
1903  Argument:  points to start of expression (the bracket)  Argument:  points to start of expression (the bracket)
1904  Returns:   TRUE or FALSE  Returns:   TRUE or FALSE
1905  */  */
1906    
1907  static BOOL  static BOOL
1908  is_startline(uschar *code)  is_startline(const uschar *code)
1909  {  {
1910  do {  do {
1911     if ((int)code[3] >= OP_BRA || code[3] == OP_ASSERT)     const uschar *scode = first_significant_code(code + 3, NULL, 0, FALSE);
1912       { if (!is_startline(code+3)) return FALSE; }     register int op = *scode;
1913     else if (code[3] != OP_CIRC) return FALSE;     if (op >= OP_BRA || op == OP_ASSERT || op == OP_ONCE || op == OP_COND)
1914         { if (!is_startline(scode)) return FALSE; }
1915       else if (op == OP_TYPESTAR || op == OP_TYPEMINSTAR)
1916         { if (scode[1] != OP_ANY) return FALSE; }
1917       else if (op != OP_CIRC) return FALSE;
1918     code += (code[1] << 8) + code[2];     code += (code[1] << 8) + code[2];
1919     }     }
1920  while (*code == OP_ALT);  while (*code == OP_ALT);
# Line 1427  Consider each alternative branch. If the Line 1933  Consider each alternative branch. If the
1933  a bracket all of whose alternatives start with the same char (recurse ad lib),  a bracket all of whose alternatives start with the same char (recurse ad lib),
1934  then we return that char, otherwise -1.  then we return that char, otherwise -1.
1935    
1936  Argument:  points to start of expression (the bracket)  Arguments:
1937  Returns:   -1 or the fixed first char    code       points to start of expression (the bracket)
1938      options    pointer to the options (used to check casing changes)
1939    
1940    Returns:     -1 or the fixed first char
1941  */  */
1942    
1943  static int  static int
1944  find_firstchar(uschar *code)  find_firstchar(const uschar *code, int *options)
1945  {  {
1946  register int c = -1;  register int c = -1;
1947  do  do {
1948    {     int d;
1949    register int charoffset = 4;     const uschar *scode = first_significant_code(code + 3, options,
1950         PCRE_CASELESS, TRUE);
1951    if ((int)code[3] >= OP_BRA || code[3] == OP_ASSERT)     register int op = *scode;
1952      {  
1953      register int d;     if (op >= OP_BRA) op = OP_BRA;
1954      if ((d = find_firstchar(code+3)) < 0) return -1;  
1955      if (c < 0) c = d; else if (c != d) return -1;     switch(op)
1956      }       {
1957         default:
1958    else switch(code[3])       return -1;
1959      {  
1960      default:       case OP_BRA:
1961      return -1;       case OP_ASSERT:
1962         case OP_ONCE:
1963      case OP_EXACT:       /* Fall through */       case OP_COND:
1964      charoffset++;       if ((d = find_firstchar(scode, options)) < 0) return -1;
1965         if (c < 0) c = d; else if (c != d) return -1;
1966      case OP_CHARS:       /* Fall through */       break;
1967      charoffset++;  
1968         case OP_EXACT:       /* Fall through */
1969         scode++;
1970    
1971         case OP_CHARS:       /* Fall through */
1972         scode++;
1973    
1974         case OP_PLUS:
1975         case OP_MINPLUS:
1976         if (c < 0) c = scode[1]; else if (c != scode[1]) return -1;
1977         break;
1978         }
1979    
1980      case OP_PLUS:     code += (code[1] << 8) + code[2];
1981      case OP_MINPLUS:     }
     if (c < 0) c = code[charoffset]; else if (c != code[charoffset]) return -1;  
     break;  
     }  
   code += (code[1] << 8) + code[2];  
   }  
1982  while (*code == OP_ALT);  while (*code == OP_ALT);
1983  return c;  return c;
1984  }  }
1985    
1986    
1987    
1988    
1989    
1990  /*************************************************  /*************************************************
1991  *        Compile a Regular Expression            *  *        Compile a Regular Expression            *
1992  *************************************************/  *************************************************/
# Line 1482  Arguments: Line 1999  Arguments:
1999    options      various option bits    options      various option bits
2000    errorptr     pointer to pointer to error text    errorptr     pointer to pointer to error text
2001    erroroffset  ptr offset in pattern where error was detected    erroroffset  ptr offset in pattern where error was detected
2002      tables       pointer to character tables or NULL
2003    
2004  Returns:       pointer to compiled data block, or NULL on error,  Returns:       pointer to compiled data block, or NULL on error,
2005                 with errorptr and erroroffset set                 with errorptr and erroroffset set
2006  */  */
2007    
2008  pcre *  pcre *
2009  pcre_compile(const char *pattern, int options, char **errorptr,  pcre_compile(const char *pattern, int options, const char **errorptr,
2010    int *erroroffset)    int *erroroffset, const unsigned char *tables)
2011  {  {
2012  real_pcre *re;  real_pcre *re;
 int spaces = 0;  
2013  int length = 3;      /* For initial BRA plus length */  int length = 3;      /* For initial BRA plus length */
2014  int runlength;  int runlength;
2015  int c, size;  int c, size, reqchar, countlits;
2016  int bracount = 0;  int bracount = 0;
 int brastack[200];  
 int brastackptr = 0;  
2017  int top_backref = 0;  int top_backref = 0;
2018  uschar *code, *ptr;  int branch_extra = 0;
2019    int branch_newextra;
2020    unsigned int brastackptr = 0;
2021    uschar *code;
2022    const uschar *ptr;
2023    compile_data compile_block;
2024    int brastack[BRASTACK_SIZE];
2025    uschar bralenstack[BRASTACK_SIZE];
2026    
2027  #ifdef DEBUG  #ifdef DEBUG
2028  uschar *code_base, *code_end;  uschar *code_base, *code_end;
# Line 1527  if ((options & ~PUBLIC_OPTIONS) != 0) Line 2049  if ((options & ~PUBLIC_OPTIONS) != 0)
2049    return NULL;    return NULL;
2050    }    }
2051    
2052  #ifdef DEBUG  /* Set up pointers to the individual character tables */
2053  printf("------------------------------------------------------------------\n");  
2054  printf("%s\n", pattern);  if (tables == NULL) tables = pcre_default_tables;
2055  #endif  compile_block.lcc = tables + lcc_offset;
2056    compile_block.fcc = tables + fcc_offset;
2057    compile_block.cbits = tables + cbits_offset;
2058    compile_block.ctypes = tables + ctypes_offset;
2059    
2060    /* Reflect pattern for debugging output */
2061    
2062    DPRINTF(("------------------------------------------------------------------\n"));
2063    DPRINTF(("%s\n", pattern));
2064    
2065  /* The first thing to do is to make a pass over the pattern to compute the  /* The first thing to do is to make a pass over the pattern to compute the
2066  amount of store required to hold the compiled code. This does not have to be  amount of store required to hold the compiled code. This does not have to be
# Line 1539  internal flag settings. Make an attempt Line 2069  internal flag settings. Make an attempt
2069  if an "extended" flag setting appears late in the pattern. We can't be so  if an "extended" flag setting appears late in the pattern. We can't be so
2070  clever for #-comments. */  clever for #-comments. */
2071    
2072  ptr = (uschar *)(pattern - 1);  ptr = (const uschar *)(pattern - 1);
2073  while ((c = *(++ptr)) != 0)  while ((c = *(++ptr)) != 0)
2074    {    {
2075    int min, max;    int min, max;
2076    int class_charcount;    int class_charcount;
2077    
2078    if ((pcre_ctypes[c] & ctype_space) != 0)    if ((options & PCRE_EXTENDED) != 0)
     {  
     if ((options & PCRE_EXTENDED) != 0) continue;  
     spaces++;  
     }  
   
   if (c == '#' && (options & PCRE_EXTENDED) != 0)  
2079      {      {
2080      while ((c = *(++ptr)) != 0 && c != '\n');      if ((compile_block.ctypes[c] & ctype_space) != 0) continue;
2081      continue;      if (c == '#')
2082          {
2083          while ((c = *(++ptr)) != 0 && c != '\n');
2084          continue;
2085          }
2086      }      }
2087    
2088    switch(c)    switch(c)
# Line 1566  while ((c = *(++ptr)) != 0) Line 2094  while ((c = *(++ptr)) != 0)
2094    
2095      case '\\':      case '\\':
2096        {        {
2097        uschar *save_ptr = ptr;        const uschar *save_ptr = ptr;
2098        c = check_escape(&ptr, errorptr, bracount, options, FALSE);        c = check_escape(&ptr, errorptr, bracount, options, FALSE, &compile_block);
2099        if (*errorptr != NULL) goto PCRE_ERROR_RETURN;        if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
2100        if (c >= 0)        if (c >= 0)
2101          {          {
# Line 1587  while ((c = *(++ptr)) != 0) Line 2115  while ((c = *(++ptr)) != 0)
2115        int refnum = -c - ESC_REF;        int refnum = -c - ESC_REF;
2116        if (refnum > top_backref) top_backref = refnum;        if (refnum > top_backref) top_backref = refnum;
2117        length++;   /* For single back reference */        length++;   /* For single back reference */
2118        if (ptr[1] == '{' && is_counted_repeat(ptr+2))        if (ptr[1] == '{' && is_counted_repeat(ptr+2, &compile_block))
2119          {          {
2120          ptr = read_repeat_counts(ptr+2, &min, &max, errorptr);          ptr = read_repeat_counts(ptr+2, &min, &max, errorptr, &compile_block);
2121          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
2122          if ((min == 0 && (max == 1 || max == -1)) ||          if ((min == 0 && (max == 1 || max == -1)) ||
2123            (min == 1 && max == -1))            (min == 1 && max == -1))
# Line 1613  while ((c = *(++ptr)) != 0) Line 2141  while ((c = *(++ptr)) != 0)
2141      or back reference. */      or back reference. */
2142    
2143      case '{':      case '{':
2144      if (!is_counted_repeat(ptr+1)) goto NORMAL_CHAR;      if (!is_counted_repeat(ptr+1, &compile_block)) goto NORMAL_CHAR;
2145      ptr = read_repeat_counts(ptr+1, &min, &max, errorptr);      ptr = read_repeat_counts(ptr+1, &min, &max, errorptr, &compile_block);
2146      if (*errorptr != NULL) goto PCRE_ERROR_RETURN;      if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
2147      if ((min == 0 && (max == 1 || max == -1)) ||      if ((min == 0 && (max == 1 || max == -1)) ||
2148        (min == 1 && max == -1))        (min == 1 && max == -1))
# Line 1628  while ((c = *(++ptr)) != 0) Line 2156  while ((c = *(++ptr)) != 0)
2156      if (ptr[1] == '?') ptr++;      if (ptr[1] == '?') ptr++;
2157      continue;      continue;
2158    
2159      /* An alternation contains an offset to the next branch or ket. */      /* An alternation contains an offset to the next branch or ket. If any ims
2160        options changed in the previous branch(es), and/or if we are in a
2161        lookbehind assertion, extra space will be needed at the start of the
2162        branch. This is handled by branch_extra. */
2163    
2164      case '|':      case '|':
2165      length += 3;      length += 3 + branch_extra;
2166      continue;      continue;
2167    
2168      /* A character class uses 33 characters. Don't worry about character types      /* A character class uses 33 characters. Don't worry about character types
# Line 1645  while ((c = *(++ptr)) != 0) Line 2177  while ((c = *(++ptr)) != 0)
2177        {        {
2178        if (*ptr == '\\')        if (*ptr == '\\')
2179          {          {
2180          int c = check_escape(&ptr, errorptr, bracount, options, TRUE);          int ch = check_escape(&ptr, errorptr, bracount, options, TRUE,
2181              &compile_block);
2182          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
2183          if (-c == ESC_b) class_charcount++; else class_charcount = 10;          if (-ch == ESC_b) class_charcount++; else class_charcount = 10;
2184          }          }
2185        else class_charcount++;        else class_charcount++;
2186        ptr++;        ptr++;
# Line 1662  while ((c = *(++ptr)) != 0) Line 2195  while ((c = *(++ptr)) != 0)
2195    
2196        /* A repeat needs either 1 or 5 bytes. */        /* A repeat needs either 1 or 5 bytes. */
2197    
2198        if (ptr[1] == '{' && is_counted_repeat(ptr+2))        if (*ptr != 0 && ptr[1] == '{' && is_counted_repeat(ptr+2, &compile_block))
2199          {          {
2200          ptr = read_repeat_counts(ptr+2, &min, &max, errorptr);          ptr = read_repeat_counts(ptr+2, &min, &max, errorptr, &compile_block);
2201          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
2202          if ((min == 0 && (max == 1 || max == -1)) ||          if ((min == 0 && (max == 1 || max == -1)) ||
2203            (min == 1 && max == -1))            (min == 1 && max == -1))
# Line 1678  while ((c = *(++ptr)) != 0) Line 2211  while ((c = *(++ptr)) != 0)
2211      /* Brackets may be genuine groups or special things */      /* Brackets may be genuine groups or special things */
2212    
2213      case '(':      case '(':
2214        branch_newextra = 0;
2215    
2216      /* Handle special forms of bracket, which all start (? */      /* Handle special forms of bracket, which all start (? */
2217    
2218      if (ptr[1] == '?') switch (c = ptr[2])      if (ptr[1] == '?')
2219        {        {
2220        /* Skip over comments entirely */        int set, unset;
2221        case '#':        int *optset;
       ptr += 3;  
       while (*ptr != 0 && *ptr != ')') ptr++;  
       if (*ptr == 0)  
         {  
         *errorptr = ERR18;  
         goto PCRE_ERROR_RETURN;  
         }  
       continue;  
2222    
2223        /* Non-referencing groups and lookaheads just move the pointer on, and        switch (c = ptr[2])
2224        then behave like a non-special bracket, except that they don't increment          {
2225        the count of extracting brackets. */          /* Skip over comments entirely */
2226            case '#':
2227        case ':':          ptr += 3;
2228        case '=':          while (*ptr != 0 && *ptr != ')') ptr++;
2229        case '!':          if (*ptr == 0)
2230        ptr += 2;            {
2231        break;            *errorptr = ERR18;
2232              goto PCRE_ERROR_RETURN;
2233              }
2234            continue;
2235    
2236        /* Ditto for the "once only" bracket, allowed only if the extra bit          /* Non-referencing groups and lookaheads just move the pointer on, and
2237        is set. */          then behave like a non-special bracket, except that they don't increment
2238            the count of extracting brackets. Ditto for the "once only" bracket,
2239            which is in Perl from version 5.005. */
2240    
2241        case '>':          case ':':
2242        if ((options & PCRE_EXTRA) != 0)          case '=':
2243          {          case '!':
2244            case '>':
2245          ptr += 2;          ptr += 2;
2246          break;          break;
         }  
       /* Else fall thourh */  
2247    
2248        /* Else loop setting valid options until ) is met. Anything else is an          /* Lookbehinds are in Perl from version 5.005 */
       error. */  
2249    
2250        default:          case '<':
2251        ptr += 2;          if (ptr[3] == '=' || ptr[3] == '!')
       for (;; ptr++)  
         {  
         if ((c = *ptr) == 'i')  
2252            {            {
2253            options |= PCRE_CASELESS;            ptr += 3;
2254            continue;            branch_newextra = 3;
2255              length += 3;         /* For the first branch */
2256              break;
2257              }
2258            *errorptr = ERR24;
2259            goto PCRE_ERROR_RETURN;
2260    
2261            /* Conditionals are in Perl from version 5.005. The bracket must either
2262            be followed by a number (for bracket reference) or by an assertion
2263            group. */
2264    
2265            case '(':
2266            if ((compile_block.ctypes[ptr[3]] & ctype_digit) != 0)
2267              {
2268              ptr += 4;
2269              length += 2;
2270              while ((compile_block.ctypes[*ptr] & ctype_digit) != 0) ptr++;
2271              if (*ptr != ')')
2272                {
2273                *errorptr = ERR26;
2274                goto PCRE_ERROR_RETURN;
2275                }
2276            }            }
2277          else if ((c = *ptr) == 'm')          else   /* An assertion must follow */
2278            {            {
2279            options |= PCRE_MULTILINE;            ptr++;   /* Can treat like ':' as far as spacing is concerned */
2280            continue;  
2281              if (ptr[2] != '?' || strchr("=!<", ptr[3]) == NULL)
2282                {
2283                ptr += 2;    /* To get right offset in message */
2284                *errorptr = ERR28;
2285                goto PCRE_ERROR_RETURN;
2286                }
2287            }            }
2288          else if (c == 's')          break;
2289    
2290            /* Else loop checking valid options until ) is met. Anything else is an
2291            error. If we are without any brackets, i.e. at top level, the settings
2292            act as if specified in the options, so massage the options immediately.
2293            This is for backward compatibility with Perl 5.004. */
2294    
2295            default:
2296            set = unset = 0;
2297            optset = &set;
2298            ptr += 2;
2299    
2300            for (;; ptr++)
2301            {            {
2302            options |= PCRE_DOTALL;            c = *ptr;
2303            continue;            switch (c)
2304                {
2305                case 'i':
2306                *optset |= PCRE_CASELESS;
2307                continue;
2308    
2309                case 'm':
2310                *optset |= PCRE_MULTILINE;
2311                continue;
2312    
2313                case 's':
2314                *optset |= PCRE_DOTALL;
2315                continue;
2316    
2317                case 'x':
2318                *optset |= PCRE_EXTENDED;
2319                continue;
2320    
2321                case 'X':
2322                *optset |= PCRE_EXTRA;
2323                continue;
2324    
2325                case 'U':
2326                *optset |= PCRE_UNGREEDY;
2327                continue;
2328    
2329                case '-':
2330                optset = &unset;
2331                continue;
2332    
2333                /* A termination by ')' indicates an options-setting-only item;
2334                this is global at top level; otherwise nothing is done here and
2335                it is handled during the compiling process on a per-bracket-group
2336                basis. */
2337    
2338                case ')':
2339                if (brastackptr == 0)
2340                  {
2341                  options = (options | set) & (~unset);
2342                  set = unset = 0;     /* To save length */
2343                  }
2344                /* Fall through */
2345    
2346                /* A termination by ':' indicates the start of a nested group with
2347                the given options set. This is again handled at compile time, but
2348                we must allow for compiled space if any of the ims options are
2349                set. We also have to allow for resetting space at the end of
2350                the group, which is why 4 is added to the length and not just 2.
2351                If there are several changes of options within the same group, this
2352                will lead to an over-estimate on the length, but this shouldn't
2353                matter very much. We also have to allow for resetting options at
2354                the start of any alternations, which we do by setting
2355                branch_newextra to 2. Finally, we record whether the case-dependent
2356                flag ever changes within the regex. This is used by the "required
2357                character" code. */
2358    
2359                case ':':
2360                if (((set|unset) & PCRE_IMS) != 0)
2361                  {
2362                  length += 4;
2363                  branch_newextra = 2;
2364                  if (((set|unset) & PCRE_CASELESS) != 0) options |= PCRE_ICHANGED;
2365                  }
2366                goto END_OPTIONS;
2367    
2368                /* Unrecognized option character */
2369    
2370                default:
2371                *errorptr = ERR12;
2372                goto PCRE_ERROR_RETURN;
2373                }
2374            }            }
2375          else if (c == 'x')  
2376            /* If we hit a closing bracket, that's it - this is a freestanding
2377            option-setting. We need to ensure that branch_extra is updated if
2378            necessary. The only values branch_newextra can have here are 0 or 2.
2379            If the value is 2, then branch_extra must either be 2 or 5, depending
2380            on whether this is a lookbehind group or not. */
2381    
2382            END_OPTIONS:
2383            if (c == ')')
2384            {            {
2385            options |= PCRE_EXTENDED;            if (branch_newextra == 2 && (branch_extra == 0 || branch_extra == 3))
2386            length -= spaces;          /* Already counted spaces */              branch_extra += branch_newextra;
2387            continue;            continue;
2388            }            }
         else if (c == ')') break;  
2389    
2390          *errorptr = ERR12;          /* If options were terminated by ':' control comes here. Fall through
2391          goto PCRE_ERROR_RETURN;          to handle the group below. */
2392          }          }
       continue;                      /* End of this bracket handling */  
2393        }        }
2394    
2395      /* Extracting brackets must be counted so we can process escapes in a      /* Extracting brackets must be counted so we can process escapes in a
# Line 1757  while ((c = *(++ptr)) != 0) Line 2398  while ((c = *(++ptr)) != 0)
2398      else bracount++;      else bracount++;
2399    
2400      /* Non-special forms of bracket. Save length for computing whole length      /* Non-special forms of bracket. Save length for computing whole length
2401      at end if there's a repeat that requires duplication of the group. */      at end if there's a repeat that requires duplication of the group. Also
2402        save the current value of branch_extra, and start the new group with
2403        the new value. If non-zero, this will either be 2 for a (?imsx: group, or 3
2404        for a lookbehind assertion. */
2405    
2406      if (brastackptr >= sizeof(brastack)/sizeof(int))      if (brastackptr >= sizeof(brastack)/sizeof(int))
2407        {        {
# Line 1765  while ((c = *(++ptr)) != 0) Line 2409  while ((c = *(++ptr)) != 0)
2409        goto PCRE_ERROR_RETURN;        goto PCRE_ERROR_RETURN;
2410        }        }
2411    
2412        bralenstack[brastackptr] = branch_extra;
2413        branch_extra = branch_newextra;
2414    
2415      brastack[brastackptr++] = length;      brastack[brastackptr++] = length;
2416      length += 3;      length += 3;
2417      continue;      continue;
2418    
2419      /* Handle ket. Look for subsequent max/min; for certain sets of values we      /* Handle ket. Look for subsequent max/min; for certain sets of values we
2420      have to replicate this bracket up to that many times. */      have to replicate this bracket up to that many times. If brastackptr is
2421        0 this is an unmatched bracket which will generate an error, but take care
2422        not to try to access brastack[-1] when computing the length and restoring
2423        the branch_extra value. */
2424    
2425      case ')':      case ')':
2426      length += 3;      length += 3;
2427        {        {
2428        int min = 1;        int minval = 1;
2429        int max = 1;        int maxval = 1;
2430        int duplength = length - brastack[--brastackptr];        int duplength;
2431    
2432          if (brastackptr > 0)
2433            {
2434            duplength = length - brastack[--brastackptr];
2435            branch_extra = bralenstack[brastackptr];
2436            }
2437          else duplength = 0;
2438    
2439        /* Leave ptr at the final char; for read_repeat_counts this happens        /* Leave ptr at the final char; for read_repeat_counts this happens
2440        automatically; for the others we need an increment. */        automatically; for the others we need an increment. */
2441    
2442        if ((c = ptr[1]) == '{' && is_counted_repeat(ptr+2))        if ((c = ptr[1]) == '{' && is_counted_repeat(ptr+2, &compile_block))
2443          {          {
2444          ptr = read_repeat_counts(ptr+2, &min, &max, errorptr);          ptr = read_repeat_counts(ptr+2, &minval, &maxval, errorptr,
2445              &compile_block);
2446          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
2447          }          }
2448        else if (c == '*') { min = 0; max = -1; ptr++; }        else if (c == '*') { minval = 0; maxval = -1; ptr++; }
2449        else if (c == '+') { max = -1; ptr++; }        else if (c == '+') { maxval = -1; ptr++; }
2450        else if (c == '?') { min = 0; ptr++; }        else if (c == '?') { minval = 0; ptr++; }
2451    
2452        /* If there is a minimum > 1 we have to replicate up to min-1 times; if        /* If the minimum is zero, we have to allow for an OP_BRAZERO before the
2453        there is a limited maximum we have to replicate up to max-1 times and        group, and if the maximum is greater than zero, we have to replicate
2454        allow for a BRAZERO item before each optional copy, as we also have to        maxval-1 times; each replication acquires an OP_BRAZERO plus a nesting
2455        do before the first copy if the minimum is zero. */        bracket set - hence the 7. */
2456    
2457        if (min == 0) length++;        if (minval == 0)
2458          else if (min > 1) length += (min - 1) * duplength;          {
2459        if (max > min) length += (max - min) * (duplength + 1);          length++;
2460        }          if (maxval > 0) length += (maxval - 1) * (duplength + 7);
2461            }
2462    
2463          /* When the minimum is greater than zero, 1 we have to replicate up to
2464          minval-1 times, with no additions required in the copies. Then, if
2465          there is a limited maximum we have to replicate up to maxval-1 times
2466          allowing for a BRAZERO item before each optional copy and nesting
2467          brackets for all but one of the optional copies. */
2468    
2469          else
2470            {
2471            length += (minval - 1) * duplength;
2472            if (maxval > minval)   /* Need this test as maxval=-1 means no limit */
2473              length += (maxval - minval) * (duplength + 7) - 6;
2474            }
2475          }
2476      continue;      continue;
2477    
2478      /* Non-special character. For a run of such characters the length required      /* Non-special character. For a run of such characters the length required
# Line 1814  while ((c = *(++ptr)) != 0) Line 2486  while ((c = *(++ptr)) != 0)
2486      runlength = 0;      runlength = 0;
2487      do      do
2488        {        {
2489        if ((pcre_ctypes[c] & ctype_space) != 0)        if ((options & PCRE_EXTENDED) != 0)
         {  
         if ((options & PCRE_EXTENDED) != 0) continue;  
         spaces++;  
         }  
   
       if (c == '#' && (options & PCRE_EXTENDED) != 0)  
2490          {          {
2491          while ((c = *(++ptr)) != 0 && c != '\n');          if ((compile_block.ctypes[c] & ctype_space) != 0) continue;
2492          continue;          if (c == '#')
2493              {
2494              while ((c = *(++ptr)) != 0 && c != '\n');
2495              continue;
2496              }
2497          }          }
2498    
2499        /* Backslash may introduce a data char or a metacharacter; stop the        /* Backslash may introduce a data char or a metacharacter; stop the
# Line 1831  while ((c = *(++ptr)) != 0) Line 2501  while ((c = *(++ptr)) != 0)
2501    
2502        if (c == '\\')        if (c == '\\')
2503          {          {
2504          uschar *saveptr = ptr;          const uschar *saveptr = ptr;
2505          c = check_escape(&ptr, errorptr, bracount, options, FALSE);          c = check_escape(&ptr, errorptr, bracount, options, FALSE,
2506              &compile_block);
2507          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
2508          if (c < 0) { ptr = saveptr; break; }          if (c < 0) { ptr = saveptr; break; }
2509          }          }
# Line 1844  while ((c = *(++ptr)) != 0) Line 2515  while ((c = *(++ptr)) != 0)
2515    
2516      /* This "while" is the end of the "do" above. */      /* This "while" is the end of the "do" above. */
2517    
2518      while (runlength < 255 && (pcre_ctypes[c = *(++ptr)] & ctype_meta) == 0);      while (runlength < 255 &&
2519          (compile_block.ctypes[c = *(++ptr)] & ctype_meta) == 0);
2520    
2521      ptr--;      ptr--;
2522      length += runlength;      length += runlength;
# Line 1861  if (length > 65539) Line 2533  if (length > 65539)
2533    }    }
2534    
2535  /* Compute the size of data block needed and get it, either from malloc or  /* Compute the size of data block needed and get it, either from malloc or
2536  externally provided function. Put in the magic number and the options. */  externally provided function. We specify "code[0]" in the offsetof() expression
2537    rather than just "code", because it has been reported that one broken compiler
2538    fails on "code" because it is also an independent variable. It should make no
2539    difference to the value of the offsetof(). */
2540    
2541  size = length + offsetof(real_pcre, code);  size = length + offsetof(real_pcre, code[0]);
2542  re = (real_pcre *)(pcre_malloc)(size);  re = (real_pcre *)(pcre_malloc)(size);
2543    
2544  if (re == NULL)  if (re == NULL)
# Line 1872  if (re == NULL) Line 2547  if (re == NULL)
2547    return NULL;    return NULL;
2548    }    }
2549    
2550    /* Put in the magic number and the options. */
2551    
2552  re->magic_number = MAGIC_NUMBER;  re->magic_number = MAGIC_NUMBER;
2553  re->options = options;  re->options = options;
2554    re->tables = tables;
2555    
2556  /* Set up a starting, non-extracting bracket, then compile the expression. On  /* Set up a starting, non-extracting bracket, then compile the expression. On
2557  error, *errorptr will be set non-NULL, so we don't need to look at the result  error, *errorptr will be set non-NULL, so we don't need to look at the result
2558  of the function here. */  of the function here. */
2559    
2560  ptr = (uschar *)pattern;  ptr = (const uschar *)pattern;
2561  code = re->code;  code = re->code;
2562  *code = OP_BRA;  *code = OP_BRA;
2563  bracount = 0;  bracount = 0;
2564  (void)compile_regex(options, &bracount, &code, &ptr, errorptr);  (void)compile_regex(options, -1, &bracount, &code, &ptr, errorptr, FALSE, -1,
2565      &reqchar, &countlits, &compile_block);
2566  re->top_bracket = bracount;  re->top_bracket = bracount;
2567  re->top_backref = top_backref;  re->top_backref = top_backref;
2568    
# Line 1900  if debugging, leave the test till after Line 2579  if debugging, leave the test till after
2579  if (code - re->code > length) *errorptr = ERR23;  if (code - re->code > length) *errorptr = ERR23;
2580  #endif  #endif
2581    
2582    /* Give an error if there's back reference to a non-existent capturing
2583    subpattern. */
2584    
2585    if (top_backref > re->top_bracket) *errorptr = ERR15;
2586    
2587  /* Failed to compile */  /* Failed to compile */
2588    
2589  if (*errorptr != NULL)  if (*errorptr != NULL)
2590    {    {
2591    (pcre_free)(re);    (pcre_free)(re);
2592    PCRE_ERROR_RETURN:    PCRE_ERROR_RETURN:
2593    *erroroffset = ptr - (uschar *)pattern;    *erroroffset = ptr - (const uschar *)pattern;
2594    return NULL;    return NULL;
2595    }    }
2596    
2597  /* If the anchored option was not passed, set flag if we can determine that it  /* If the anchored option was not passed, set flag if we can determine that the
2598  is anchored by virtue of ^ characters or \A or anything else. Otherwise, see if  pattern is anchored by virtue of ^ characters or \A or anything else (such as
2599  we can determine what the first character has to be, because that speeds up  starting with .* when DOTALL is set).
2600  unanchored matches no end. In the case of multiline matches, an alternative is  
2601  to set the PCRE_STARTLINE flag if all branches start with ^. */  Otherwise, see if we can determine what the first character has to be, because
2602    that speeds up unanchored matches no end. If not, see if we can set the
2603    PCRE_STARTLINE flag. This is helpful for multiline matches when all branches
2604    start with ^. and also when all branches start with .* for non-DOTALL matches.
2605    */
2606    
2607  if ((options & PCRE_ANCHORED) == 0)  if ((options & PCRE_ANCHORED) == 0)
2608    {    {
2609    if (is_anchored(re->code, (options & PCRE_MULTILINE) != 0))    int temp_options = options;
2610      if (is_anchored(re->code, &temp_options))
2611      re->options |= PCRE_ANCHORED;      re->options |= PCRE_ANCHORED;
2612    else    else
2613      {      {
2614      int c = find_firstchar(re->code);      int ch = find_firstchar(re->code, &temp_options);
2615      if (c >= 0)      if (ch >= 0)
2616        {        {
2617        re->first_char = c;        re->first_char = ch;
2618        re->options |= PCRE_FIRSTSET;        re->options |= PCRE_FIRSTSET;
2619        }        }
2620      else if (is_startline(re->code))      else if (is_startline(re->code))
# Line 1933  if ((options & PCRE_ANCHORED) == 0) Line 2622  if ((options & PCRE_ANCHORED) == 0)
2622      }      }
2623    }    }
2624    
2625    /* Save the last required character if there are at least two literal
2626    characters on all paths, or if there is no first character setting. */
2627    
2628    if (reqchar >= 0 && (countlits > 1 || (re->options & PCRE_FIRSTSET) == 0))
2629      {
2630      re->req_char = reqchar;
2631      re->options |= PCRE_REQCHSET;
2632      }
2633    
2634  /* Print out the compiled data for debugging */  /* Print out the compiled data for debugging */
2635    
2636  #ifdef DEBUG  #ifdef DEBUG
2637    
2638  printf("Length = %d top_bracket = %d top_backref=%d\n",  printf("Length = %d top_bracket = %d top_backref = %d\n",
2639    length, re->top_bracket, re->top_backref);    length, re->top_bracket, re->top_backref);
2640    
2641  if (re->options != 0)  if (re->options != 0)
2642    {    {
2643    printf("%s%s%s%s%s%s%s\n",    printf("%s%s%s%s%s%s%s%s%s\n",
2644      ((re->options & PCRE_ANCHORED) != 0)? "anchored " : "",      ((re->options & PCRE_ANCHORED) != 0)? "anchored " : "",
2645      ((re->options & PCRE_CASELESS) != 0)? "caseless " : "",      ((re->options & PCRE_CASELESS) != 0)? "caseless " : "",
2646        ((re->options & PCRE_ICHANGED) != 0)? "case state changed " : "",
2647      ((re->options & PCRE_EXTENDED) != 0)? "extended " : "",      ((re->options & PCRE_EXTENDED) != 0)? "extended " : "",
2648      ((re->options & PCRE_MULTILINE) != 0)? "multiline " : "",      ((re->options & PCRE_MULTILINE) != 0)? "multiline " : "",
2649      ((re->options & PCRE_DOTALL) != 0)? "dotall " : "",      ((re->options & PCRE_DOTALL) != 0)? "dotall " : "",
2650      ((re->options & PCRE_DOLLAR_ENDONLY) != 0)? "endonly " : "",      ((re->options & PCRE_DOLLAR_ENDONLY) != 0)? "endonly " : "",
2651      ((re->options & PCRE_EXTRA) != 0)? "extra " : "");      ((re->options & PCRE_EXTRA) != 0)? "extra " : "",
2652        ((re->options & PCRE_UNGREEDY) != 0)? "ungreedy " : "");
2653    }    }
2654    
2655  if ((re->options & PCRE_FIRSTSET) != 0)  if ((re->options & PCRE_FIRSTSET) != 0)
# Line 1958  if ((re->options & PCRE_FIRSTSET) != 0) Line 2658  if ((re->options & PCRE_FIRSTSET) != 0)
2658      else printf("First char = \\x%02x\n", re->first_char);      else printf("First char = \\x%02x\n", re->first_char);
2659    }    }
2660    
2661    if ((re->options & PCRE_REQCHSET) != 0)
2662      {
2663      if (isprint(re->req_char)) printf("Req char = %c\n", re->req_char);
2664        else printf("Req char = \\x%02x\n", re->req_char);
2665      }
2666    
2667  code_end = code;  code_end = code;
2668  code_base = code = re->code;  code_base = code = re->code;
2669    
# Line 1975  while (code < code_end) Line 2681  while (code < code_end)
2681    
2682    else switch(*code)    else switch(*code)
2683      {      {
2684        case OP_OPT:
2685        printf(" %.2x %s", code[1], OP_names[*code]);
2686        code++;
2687        break;
2688    
2689        case OP_COND:
2690        printf("%3d Cond", (code[1] << 8) + code[2]);
2691        code += 2;
2692        break;
2693    
2694        case OP_CREF:
2695        printf(" %.2d %s", code[1], OP_names[*code]);
2696        code++;
2697        break;
2698    
2699      case OP_CHARS:      case OP_CHARS:
2700      charlength = *(++code);      charlength = *(++code);
2701      printf("%3d ", charlength);      printf("%3d ", charlength);
# Line 1988  while (code < code_end) Line 2709  while (code < code_end)
2709      case OP_KET:      case OP_KET:
2710      case OP_ASSERT:      case OP_ASSERT:
2711      case OP_ASSERT_NOT:      case OP_ASSERT_NOT:
2712        case OP_ASSERTBACK:
2713        case OP_ASSERTBACK_NOT:
2714      case OP_ONCE:      case OP_ONCE:
2715      printf("%3d %s", (code[1] << 8) + code[2], OP_names[*code]);      printf("%3d %s", (code[1] << 8) + code[2], OP_names[*code]);
2716      code += 2;      code += 2;
2717      break;      break;
2718    
2719        case OP_REVERSE:
2720        printf("%3d %s", (code[1] << 8) + code[2], OP_names[*code]);
2721        code += 2;
2722        break;
2723    
2724      case OP_STAR:      case OP_STAR:
2725      case OP_MINSTAR:      case OP_MINSTAR:
2726      case OP_PLUS:      case OP_PLUS:
# Line 2017  while (code < code_end) Line 2745  while (code < code_end)
2745      case OP_MINUPTO:      case OP_MINUPTO:
2746      if (isprint(c = code[3])) printf("    %c{", c);      if (isprint(c = code[3])) printf("    %c{", c);
2747        else printf("    \\x%02x{", c);        else printf("    \\x%02x{", c);
2748      if (*code != OP_EXACT) printf(",");      if (*code != OP_EXACT) printf("0,");
2749      printf("%d}", (code[1] << 8) + code[2]);      printf("%d}", (code[1] << 8) + code[2]);
2750      if (*code == OP_MINUPTO) printf("?");      if (*code == OP_MINUPTO) printf("?");
2751      code += 3;      code += 3;
# Line 2062  while (code < code_end) Line 2790  while (code < code_end)
2790    
2791      case OP_REF:      case OP_REF:
2792      printf("    \\%d", *(++code));      printf("    \\%d", *(++code));
2793      break;      code ++;
2794        goto CLASS_REF_REPEAT;
2795    
2796      case OP_CLASS:      case OP_CLASS:
2797        {        {
2798        int i, min, max;        int i, min, max;
   
2799        code++;        code++;
2800        printf("    [");        printf("    [");
2801    
# Line 2092  while (code < code_end) Line 2820  while (code < code_end)
2820        printf("]");        printf("]");
2821        code += 32;        code += 32;
2822    
2823          CLASS_REF_REPEAT:
2824    
2825        switch(*code)        switch(*code)
2826          {          {
2827          case OP_CRSTAR:          case OP_CRSTAR:
# Line 2149  return (pcre *)re; Line 2879  return (pcre *)re;
2879    
2880    
2881  /*************************************************  /*************************************************
 *        Match a character type                  *  
 *************************************************/  
   
 /* Not used in all the places it might be as it's sometimes faster  
 to put the code inline.  
   
 Arguments:  
   type        the character type  
   c           the character  
   dotall      the dotall flag  
   
 Returns:      TRUE if character is of the type  
 */  
   
 static BOOL  
 match_type(int type, int c, BOOL dotall)  
 {  
   
 #ifdef DEBUG  
 if (isprint(c)) printf("matching subject %c against ", c);  
   else printf("matching subject \\x%02x against ", c);  
 printf("%s\n", OP_names[type]);  
 #endif  
   
 switch(type)  
   {  
   case OP_ANY:            return dotall || c != '\n';  
   case OP_NOT_DIGIT:      return (pcre_ctypes[c] & ctype_digit) == 0;  
   case OP_DIGIT:          return (pcre_ctypes[c] & ctype_digit) != 0;  
   case OP_NOT_WHITESPACE: return (pcre_ctypes[c] & ctype_space) == 0;  
   case OP_WHITESPACE:     return (pcre_ctypes[c] & ctype_space) != 0;  
   case OP_NOT_WORDCHAR:   return (pcre_ctypes[c] & ctype_word) == 0;  
   case OP_WORDCHAR:       return (pcre_ctypes[c] & ctype_word) != 0;  
   }  
 return FALSE;  
 }  
   
   
   
 /*************************************************  
2882  *          Match a back-reference                *  *          Match a back-reference                *
2883  *************************************************/  *************************************************/
2884    
2885  /* If a back reference hasn't been set, the match fails.  /* If a back reference hasn't been set, the length that is passed is greater
2886    than the number of characters left in the string, so the match fails.
2887    
2888  Arguments:  Arguments:
2889    number      reference number    offset      index into the offset vector
2890    eptr        points into the subject    eptr        points into the subject
2891    length      length to be matched    length      length to be matched
2892    md          points to match data block    md          points to match data block
2893      ims         the ims flags
2894    
2895  Returns:      TRUE if matched  Returns:      TRUE if matched
2896  */  */
2897    
2898  static BOOL  static BOOL
2899  match_ref(int number, register uschar *eptr, int length, match_data *md)  match_ref(int offset, register const uschar *eptr, int length, match_data *md,
2900      unsigned long int ims)
2901  {  {
2902  uschar *p = md->start_subject + md->offset_vector[number];  const uschar *p = md->start_subject + md->offset_vector[offset];
2903    
2904  #ifdef DEBUG  #ifdef DEBUG
2905  if (eptr >= md->end_subject)  if (eptr >= md->end_subject)
# Line 2223  printf("\n"); Line 2916  printf("\n");
2916    
2917  /* Always fail if not enough characters left */  /* Always fail if not enough characters left */
2918    
2919  if (length > md->end_subject - p) return FALSE;  if (length > md->end_subject - eptr) return FALSE;
2920    
2921  /* Separate the caselesss case for speed */  /* Separate the caselesss case for speed */
2922    
2923  if (md->caseless)  if ((ims & PCRE_CASELESS) != 0)
2924    { while (length-- > 0) if (pcre_lcc[*p++] != pcre_lcc[*eptr++]) return FALSE; }    {
2925      while (length-- > 0)
2926        if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE;
2927      }
2928  else  else
2929    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
2930    
# Line 2241  return TRUE; Line 2937  return TRUE;
2937  *         Match from current position            *  *         Match from current position            *
2938  *************************************************/  *************************************************/
2939    
2940  /* On entry ecode points to the first opcode, and eptr to the first character.  /* On entry ecode points to the first opcode, and eptr to the first character
2941    in the subject string, while eptrb holds the value of eptr at the start of the
2942    last bracketed group - used for breaking infinite loops matching zero-length
2943    strings.
2944    
2945  Arguments:  Arguments:
2946     eptr        pointer in subject     eptr        pointer in subject
2947     ecode       position in code     ecode       position in code
2948     offset_top  current top pointer     offset_top  current top pointer
2949     md          pointer to "static" info for the match     md          pointer to "static" info for the match
2950       ims         current /i, /m, and /s options
2951       condassert  TRUE if called to check a condition assertion
2952       eptrb       eptr at start of last bracket
2953    
2954  Returns:       TRUE if matched  Returns:       TRUE if matched
2955  */  */
2956    
2957  static BOOL  static BOOL
2958  match(register uschar *eptr, register uschar *ecode, int offset_top,  match(register const uschar *eptr, register const uschar *ecode,
2959    match_data *md)    int offset_top, match_data *md, unsigned long int ims, BOOL condassert,
2960      const uschar *eptrb)
2961  {  {
2962    unsigned long int original_ims = ims;   /* Save for resetting on ')' */
2963    
2964  for (;;)  for (;;)
2965    {    {
2966      int op = (int)*ecode;
2967    int min, max, ctype;    int min, max, ctype;
2968    register int i;    register int i;
2969    register int c;    register int c;
2970    BOOL minimize;    BOOL minimize = FALSE;
2971    
2972    /* Opening bracket. Check the alternative branches in turn, failing if none    /* Opening capturing bracket. If there is space in the offset vector, save
2973    match. We have to set the start offset if required and there is space    the current subject position in the working slot at the top of the vector. We
2974    in the offset vector so that it is available for subsequent back references    mustn't change the current values of the data slot, because they may be set
2975    if the bracket matches. However, if the bracket fails, we must put back the    from a previous iteration of this group, and be referred to by a reference
2976    previous value of both offsets in case they were set by a previous copy of    inside the group.
2977    the same bracket. Don't worry about setting the flag for the error case here;  
2978    that is handled in the code for KET. */    If the bracket fails to match, we need to restore this value and also the
2979      values of the final offsets, in case they were set by a previous iteration of
2980      the same bracket.
2981    
2982      If there isn't enough space in the offset vector, treat this as if it were a
2983      non-capturing bracket. Don't worry about setting the flag for the error case
2984      here; that is handled in the code for KET. */
2985    
2986    if ((int)*ecode >= OP_BRA)    if (op > OP_BRA)
2987      {      {
2988      int number = (*ecode - OP_BRA) << 1;      int number = op - OP_BRA;
2989      int save_offset1, save_offset2;      int offset = number << 1;
2990    
2991      #ifdef DEBUG  #ifdef DEBUG
2992      printf("start bracket %d\n", number/2);      printf("start bracket %d subject=", number);
2993      #endif      pchars(eptr, 16, TRUE, md);
2994        printf("\n");
2995    #endif
2996    
2997      if (number > 0 && number < md->offset_end)      if (offset < md->offset_max)
2998        {        {
2999        save_offset1 = md->offset_vector[number];        int save_offset1 = md->offset_vector[offset];
3000        save_offset2 = md->offset_vector[number+1];        int save_offset2 = md->offset_vector[offset+1];
3001        md->offset_vector[number] = eptr - md->start_subject;        int save_offset3 = md->offset_vector[md->offset_end - number];
3002    
3003          DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
3004          md->offset_vector[md->offset_end - number] = eptr - md->start_subject;
3005    
3006          do
3007            {
3008            if (match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr)) return TRUE;
3009            ecode += (ecode[1] << 8) + ecode[2];
3010            }
3011          while (*ecode == OP_ALT);
3012    
3013        #ifdef DEBUG        DPRINTF(("bracket %d failed\n", number));
3014        printf("saving %d %d\n", save_offset1, save_offset2);  
3015        #endif        md->offset_vector[offset] = save_offset1;
3016          md->offset_vector[offset+1] = save_offset2;
3017          md->offset_vector[md->offset_end - number] = save_offset3;
3018          return FALSE;
3019        }        }
3020    
3021      /* Recurse for all the alternatives. */      /* Insufficient room for saving captured contents */
3022    
3023        else op = OP_BRA;
3024        }
3025    
3026      /* Other types of node can be handled by a switch */
3027    
3028      switch(op)
3029        {
3030        case OP_BRA:     /* Non-capturing bracket: optimized */
3031        DPRINTF(("start bracket 0\n"));
3032      do      do
3033        {        {
3034        if (match(eptr, ecode+3, offset_top, md)) return TRUE;        if (match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr)) return TRUE;
3035        ecode += (ecode[1] << 8) + ecode[2];        ecode += (ecode[1] << 8) + ecode[2];
3036        }        }
3037      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
3038        DPRINTF(("bracket 0 failed\n"));
3039        return FALSE;
3040    
3041        /* Conditional group: compilation checked that there are no more than
3042        two branches. If the condition is false, skipping the first branch takes us
3043        past the end if there is only one branch, but that's OK because that is
3044        exactly what going to the ket would do. */
3045    
3046        case OP_COND:
3047        if (ecode[3] == OP_CREF)         /* Condition is extraction test */
3048          {
3049          int offset = ecode[4] << 1;    /* Doubled reference number */
3050          return match(eptr,
3051            ecode + ((offset < offset_top && md->offset_vector[offset] >= 0)?
3052              5 : 3 + (ecode[1] << 8) + ecode[2]),
3053            offset_top, md, ims, FALSE, eptr);
3054          }
3055    
3056      #ifdef DEBUG      /* The condition is an assertion. Call match() to evaluate it - setting
3057      printf("bracket %d failed\n", number/2);      the final argument TRUE causes it to stop at the end of an assertion. */
     #endif  
3058    
3059      if (number > 0 && number < md->offset_end)      else
3060        {        {
3061        md->offset_vector[number] = save_offset1;        if (match(eptr, ecode+3, offset_top, md, ims, TRUE, NULL))
3062        md->offset_vector[number+1] = save_offset2;          {
3063            ecode += 3 + (ecode[4] << 8) + ecode[5];
3064            while (*ecode == OP_ALT) ecode += (ecode[1] << 8) + ecode[2];
3065            }
3066          else ecode += (ecode[1] << 8) + ecode[2];
3067          return match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr);
3068        }        }
3069        /* Control never reaches here */
3070    
3071      return FALSE;      /* Skip over conditional reference data if encountered (should not be) */
     }  
3072    
3073    /* Other types of node can be handled by a switch */      case OP_CREF:
3074        ecode += 2;
3075        break;
3076    
3077        /* End of the pattern. If PCRE_NOTEMPTY is set, fail if we have matched
3078        an empty string - recursion will then try other alternatives, if any. */
3079    
   switch(*ecode)  
     {  
3080      case OP_END:      case OP_END:
3081        if (md->notempty && eptr == md->start_match) return FALSE;
3082      md->end_match_ptr = eptr;          /* Record where we ended */      md->end_match_ptr = eptr;          /* Record where we ended */
3083      md->end_offset_top = offset_top;   /* and how many extracts were taken */      md->end_offset_top = offset_top;   /* and how many extracts were taken */
3084      return TRUE;      return TRUE;
3085    
3086      /* The equivalent of Prolog's "cut" - if the rest doesn't match, the      /* Change option settings */
     whole thing doesn't match, so we have to get out via a longjmp(). */  
3087    
3088      case OP_CUT:      case OP_OPT:
3089      if (match(eptr, ecode+1, offset_top, md)) return TRUE;      ims = ecode[1];
3090      longjmp(md->fail_env, 1);      ecode += 2;
3091        DPRINTF(("ims set to %02x\n", ims));
3092        break;
3093    
3094      /* Assertion brackets. Check the alternative branches in turn - the      /* Assertion brackets. Check the alternative branches in turn - the
3095      matching won't pass the KET for an assertion. If any one branch matches,      matching won't pass the KET for an assertion. If any one branch matches,
3096      the assertion is true. */      the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
3097        start of each branch to move the current point backwards, so the code at
3098        this level is identical to the lookahead case. */
3099    
3100      case OP_ASSERT:      case OP_ASSERT:
3101        case OP_ASSERTBACK:
3102      do      do
3103        {        {
3104        if (match(eptr, ecode+3, offset_top, md)) break;        if (match(eptr, ecode+3, offset_top, md, ims, FALSE, NULL)) break;
3105        ecode += (ecode[1] << 8) + ecode[2];        ecode += (ecode[1] << 8) + ecode[2];
3106        }        }
3107      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
3108      if (*ecode == OP_KET) return FALSE;      if (*ecode == OP_KET) return FALSE;
3109    
3110        /* If checking an assertion for a condition, return TRUE. */
3111    
3112        if (condassert) return TRUE;
3113    
3114      /* Continue from after the assertion, updating the offsets high water      /* Continue from after the assertion, updating the offsets high water
3115      mark, since extracts may have been taken during the assertion. */      mark, since extracts may have been taken during the assertion. */
3116    
# Line 2353  for (;;) Line 3122  for (;;)
3122      /* Negative assertion: all branches must fail to match */      /* Negative assertion: all branches must fail to match */
3123    
3124      case OP_ASSERT_NOT:      case OP_ASSERT_NOT:
3125        case OP_ASSERTBACK_NOT:
3126      do      do
3127        {        {
3128        if (match(eptr, ecode+3, offset_top, md)) return FALSE;        if (match(eptr, ecode+3, offset_top, md, ims, FALSE, NULL)) return FALSE;
3129        ecode += (ecode[1] << 8) + ecode[2];        ecode += (ecode[1] << 8) + ecode[2];
3130        }        }
3131      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
3132    
3133        if (condassert) return TRUE;
3134      ecode += 3;      ecode += 3;
3135      continue;      continue;
3136    
3137        /* Move the subject pointer back. This occurs only at the start of
3138        each branch of a lookbehind assertion. If we are too close to the start to
3139        move back, this match function fails. */
3140    
3141        case OP_REVERSE:
3142        eptr -= (ecode[1] << 8) + ecode[2];
3143        if (eptr < md->start_subject) return FALSE;
3144        ecode += 3;
3145        break;
3146    
3147    
3148      /* "Once" brackets are like assertion brackets except that after a match,      /* "Once" brackets are like assertion brackets except that after a match,
3149      the point in the subject string is not moved back. Thus there can never be      the point in the subject string is not moved back. Thus there can never be
3150      a move back into the brackets. Check the alternative branches in turn - the      a move back into the brackets. Check the alternative branches in turn - the
3151      matching won't pass the KET for this kind of subpattern. If any one branch      matching won't pass the KET for this kind of subpattern. If any one branch
3152      matches, we carry on, leaving the subject pointer. */      matches, we carry on as at the end of a normal bracket, leaving the subject
3153        pointer. */
3154    
3155      case OP_ONCE:      case OP_ONCE:
     do  
3156        {        {
3157        if (match(eptr, ecode+3, offset_top, md)) break;        const uschar *prev = ecode;
3158        ecode += (ecode[1] << 8) + ecode[2];  
3159        }        do
3160      while (*ecode == OP_ALT);          {
3161      if (*ecode == OP_KET) return FALSE;          if (match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr)) break;
3162            ecode += (ecode[1] << 8) + ecode[2];
3163            }
3164          while (*ecode == OP_ALT);
3165    
3166      /* Continue as from after the assertion, updating the offsets high water        /* If hit the end of the group (which could be repeated), fail */
     mark, since extracts may have been taken. */  
3167    
3168      do ecode += (ecode[1] << 8) + ecode[2]; while (*ecode == OP_ALT);        if (*ecode != OP_ONCE && *ecode != OP_ALT) return FALSE;
3169      ecode += 3;  
3170      offset_top = md->end_offset_top;        /* Continue as from after the assertion, updating the offsets high water
3171      eptr = md->end_match_ptr;        mark, since extracts may have been taken. */
3172      continue;  
3173          do ecode += (ecode[1] << 8) + ecode[2]; while (*ecode == OP_ALT);
3174    
3175          offset_top = md->end_offset_top;
3176          eptr = md->end_match_ptr;
3177    
3178          /* For a non-repeating ket, just continue at this level. This also
3179          happens for a repeating ket if no characters were matched in the group.
3180          This is the forcible breaking of infinite loops as implemented in Perl
3181          5.005. If there is an options reset, it will get obeyed in the normal
3182          course of events. */
3183    
3184          if (*ecode == OP_KET || eptr == eptrb)
3185            {
3186            ecode += 3;
3187            break;
3188            }
3189    
3190          /* The repeating kets try the rest of the pattern or restart from the
3191          preceding bracket, in the appropriate order. We need to reset any options
3192          that changed within the bracket before re-running it, so check the next
3193          opcode. */
3194    
3195          if (ecode[3] == OP_OPT)
3196            {
3197            ims = (ims & ~PCRE_IMS) | ecode[4];
3198            DPRINTF(("ims set to %02x at group repeat\n", ims));
3199            }
3200    
3201          if (*ecode == OP_KETRMIN)
3202            {
3203            if (match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr) ||
3204                match(eptr, prev, offset_top, md, ims, FALSE, eptr)) return TRUE;
3205            }
3206          else  /* OP_KETRMAX */
3207            {
3208            if (match(eptr, prev, offset_top, md, ims, FALSE, eptr) ||
3209                match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr)) return TRUE;
3210            }
3211          }
3212        return FALSE;
3213    
3214      /* An alternation is the end of a branch; scan along to find the end of the      /* An alternation is the end of a branch; scan along to find the end of the
3215      bracketed group and go to there. */      bracketed group and go to there. */
# Line 2401  for (;;) Line 3226  for (;;)
3226    
3227      case OP_BRAZERO:      case OP_BRAZERO:
3228        {        {
3229        uschar *next = ecode+1;        const uschar *next = ecode+1;
3230        if (match(eptr, next, offset_top, md)) return TRUE;        if (match(eptr, next, offset_top, md, ims, FALSE, eptr)) return TRUE;
3231        do next += (next[1] << 8) + next[2]; while (*next == OP_ALT);        do next += (next[1] << 8) + next[2]; while (*next == OP_ALT);
3232        ecode = next + 3;        ecode = next + 3;
3233        }        }
# Line 2410  for (;;) Line 3235  for (;;)
3235    
3236      case OP_BRAMINZERO:      case OP_BRAMINZERO:
3237        {        {
3238        uschar *next = ecode+1;        const uschar *next = ecode+1;
3239        do next += (next[1] << 8) + next[2]; while (*next == OP_ALT);        do next += (next[1] << 8) + next[2]; while (*next == OP_ALT);
3240        if (match(eptr, next+3, offset_top, md)) return TRUE;        if (match(eptr, next+3, offset_top, md, ims, FALSE, eptr)) return TRUE;
3241        ecode++;        ecode++;
3242        }        }
3243      break;;      break;
3244    
3245      /* End of a group, repeated or non-repeating. If we are at the end of      /* End of a group, repeated or non-repeating. If we are at the end of
3246      an assertion "group", stop matching and return TRUE, but record the      an assertion "group", stop matching and return TRUE, but record the
3247      current high water mark for use by positive assertions. */      current high water mark for use by positive assertions. Do this also
3248        for the "once" (not-backup up) groups. */
3249    
3250      case OP_KET:      case OP_KET:
3251      case OP_KETRMIN:      case OP_KETRMIN:
3252      case OP_KETRMAX:      case OP_KETRMAX:
3253        {        {
3254        int number;        const uschar *prev = ecode - (ecode[1] << 8) - ecode[2];
       uschar *prev = ecode - (ecode[1] << 8) - ecode[2];  
3255    
3256        if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT || *prev == OP_ONCE)        if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
3257              *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
3258              *prev == OP_ONCE)
3259          {          {
3260          md->end_match_ptr = eptr;      /* For ONCE */          md->end_match_ptr = eptr;      /* For ONCE */
3261          md->end_offset_top = offset_top;          md->end_offset_top = offset_top;
3262          return TRUE;          return TRUE;
3263          }          }
3264    
3265        /* In all other cases we have to check the group number back at the        /* In all other cases except a conditional group we have to check the
3266        start and if necessary complete handling an extraction by setting the        group number back at the start and if necessary complete handling an
3267        final offset and bumping the high water mark. */        extraction by setting the offsets and bumping the high water mark. */
3268    
3269        number = (*prev - OP_BRA) << 1;        if (*prev != OP_COND)
3270            {
3271            int number = *prev - OP_BRA;
3272            int offset = number << 1;
3273    
3274        #ifdef DEBUG          DPRINTF(("end bracket %d\n", number));
       printf("end bracket %d\n", number/2);  
       #endif  
3275    
3276        if (number > 0)          if (number > 0)
         {  
         if (number >= md->offset_end) md->offset_overflow = TRUE; else  
3277            {            {
3278            md->offset_vector[number+1] = eptr - md->start_subject;            if (offset >= md->offset_max) md->offset_overflow = TRUE; else
3279            if (offset_top <= number) offset_top = number + 2;              {
3280                md->offset_vector[offset] =
3281                  md->offset_vector[md->offset_end - number];
3282                md->offset_vector[offset+1] = eptr - md->start_subject;
3283                if (offset_top <= offset) offset_top = offset + 2;
3284                }
3285            }            }
3286          }          }
3287    
3288        /* For a non-repeating ket, just advance to the next node and continue at        /* Reset the value of the ims flags, in case they got changed during
3289        this level. */        the group. */
3290    
3291          ims = original_ims;
3292          DPRINTF(("ims reset to %02x\n", ims));
3293    
3294        if (*ecode == OP_KET)        /* For a non-repeating ket, just continue at this level. This also
3295          happens for a repeating ket if no characters were matched in the group.
3296          This is the forcible breaking of infinite loops as implemented in Perl
3297          5.005. If there is an options reset, it will get obeyed in the normal
3298          course of events. */
3299    
3300          if (*ecode == OP_KET || eptr == eptrb)
3301          {          {
3302          ecode += 3;          ecode += 3;
3303          break;          break;
# Line 2468  for (;;) Line 3308  for (;;)
3308    
3309        if (*ecode == OP_KETRMIN)        if (*ecode == OP_KETRMIN)
3310          {          {
3311          if (match(eptr, ecode+3, offset_top, md) ||          if (match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr) ||
3312              match(eptr, prev, offset_top, md)) return TRUE;              match(eptr, prev, offset_top, md, ims, FALSE, eptr)) return TRUE;
3313          }          }
3314        else  /* OP_KETRMAX */        else  /* OP_KETRMAX */
3315          {          {
3316          if (match(eptr, prev, offset_top, md) ||          if (match(eptr, prev, offset_top, md, ims, FALSE, eptr) ||
3317              match(eptr, ecode+3, offset_top, md)) return TRUE;              match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr)) return TRUE;
3318          }          }
3319        }        }
3320      return FALSE;      return FALSE;
# Line 2483  for (;;) Line 3323  for (;;)
3323    
3324      case OP_CIRC:      case OP_CIRC:
3325      if (md->notbol && eptr == md->start_subject) return FALSE;      if (md->notbol && eptr == md->start_subject) return FALSE;
3326      if (md->multiline)      if ((ims & PCRE_MULTILINE) != 0)
3327        {        {
3328        if (eptr != md->start_subject && eptr[-1] != '\n') return FALSE;        if (eptr != md->start_subject && eptr[-1] != '\n') return FALSE;
3329        ecode++;        ecode++;
# Line 2498  for (;;) Line 3338  for (;;)
3338      ecode++;      ecode++;
3339      break;      break;
3340    
3341      /* Assert before internal newline if multiline, or before      /* Assert before internal newline if multiline, or before a terminating
3342      a terminating newline unless endonly is set, else end of subject unless      newline unless endonly is set, else end of subject unless noteol is set. */
     noteol is set. */  
3343    
3344      case OP_DOLL:      case OP_DOLL:
3345      if (md->noteol && eptr >= md->end_subject) return FALSE;      if ((ims & PCRE_MULTILINE) != 0)
     if (md->multiline)  
3346        {        {
3347        if (eptr < md->end_subject && *eptr != '\n') return FALSE;        if (eptr < md->end_subject) { if (*eptr != '\n') return FALSE; }
3348            else { if (md->noteol) return FALSE; }
3349        ecode++;        ecode++;
3350        break;        break;
3351        }        }
3352      else if (!md->endonly)      else
3353        {        {
3354        if (eptr < md->end_subject - 1 ||        if (md->noteol) return FALSE;
3355           (eptr == md->end_subject - 1 && *eptr != '\n')) return FALSE;        if (!md->endonly)
3356        ecode++;          {
3357        break;          if (eptr < md->end_subject - 1 ||
3358               (eptr == md->end_subject - 1 && *eptr != '\n')) return FALSE;
3359    
3360            ecode++;
3361            break;
3362            }
3363        }        }
3364      /* ... else fall through */      /* ... else fall through */
3365    
3366      /* End of subject assertion */      /* End of subject assertion (\z) */
3367    
3368      case OP_EOD:      case OP_EOD:
3369      if (eptr < md->end_subject) return FALSE;      if (eptr < md->end_subject) return FALSE;
3370      ecode++;      ecode++;
3371      break;      break;
3372    
3373        /* End of subject or ending \n assertion (\Z) */
3374    
3375        case OP_EODN:
3376        if (eptr < md->end_subject - 1 ||
3377           (eptr == md->end_subject - 1 && *eptr != '\n')) return FALSE;
3378        ecode++;
3379        break;
3380    
3381      /* Word boundary assertions */      /* Word boundary assertions */
3382    
3383      case OP_NOT_WORD_BOUNDARY:      case OP_NOT_WORD_BOUNDARY:
3384      case OP_WORD_BOUNDARY:      case OP_WORD_BOUNDARY:
3385        {        {
3386        BOOL prev_is_word = (eptr != md->start_subject) &&        BOOL prev_is_word = (eptr != md->start_subject) &&
3387          ((pcre_ctypes[eptr[-1]] & ctype_word) != 0);          ((md->ctypes[eptr[-1]] & ctype_word) != 0);
3388        BOOL cur_is_word = (eptr < md->end_subject) &&        BOOL cur_is_word = (eptr < md->end_subject) &&
3389          ((pcre_ctypes[*eptr] & ctype_word) != 0);          ((md->ctypes[*eptr] & ctype_word) != 0);
3390        if ((*ecode++ == OP_WORD_BOUNDARY)?        if ((*ecode++ == OP_WORD_BOUNDARY)?
3391             cur_is_word == prev_is_word : cur_is_word != prev_is_word)             cur_is_word == prev_is_word : cur_is_word != prev_is_word)
3392          return FALSE;          return FALSE;
# Line 2544  for (;;) Line 3396  for (;;)
3396      /* Match a single character type; inline for speed */      /* Match a single character type; inline for speed */
3397    
3398      case OP_ANY:      case OP_ANY:
3399      if (!md->dotall && eptr < md->end_subject && *eptr == '\n') return FALSE;      if ((ims & PCRE_DOTALL) == 0 && eptr < md->end_subject && *eptr == '\n')
3400          return FALSE;
3401      if (eptr++ >= md->end_subject) return FALSE;      if (eptr++ >= md->end_subject) return FALSE;
3402      ecode++;      ecode++;
3403      break;      break;
3404    
3405      case OP_NOT_DIGIT:      case OP_NOT_DIGIT:
3406      if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_digit) != 0)      if (eptr >= md->end_subject ||
3407           (md->ctypes[*eptr++] & ctype_digit) != 0)
3408        return FALSE;        return FALSE;
3409      ecode++;      ecode++;
3410      break;      break;
3411    
3412      case OP_DIGIT:      case OP_DIGIT:
3413      if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_digit) == 0)      if (eptr >= md->end_subject ||
3414           (md->ctypes[*eptr++] & ctype_digit) == 0)
3415        return FALSE;        return FALSE;
3416      ecode++;      ecode++;
3417      break;      break;
3418    
3419      case OP_NOT_WHITESPACE:      case OP_NOT_WHITESPACE:
3420      if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_space) != 0)      if (eptr >= md->end_subject ||
3421           (md->ctypes[*eptr++] & ctype_space) != 0)
3422        return FALSE;        return FALSE;
3423      ecode++;      ecode++;
3424      break;      break;
3425    
3426      case OP_WHITESPACE:      case OP_WHITESPACE:
3427      if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_space) == 0)      if (eptr >= md->end_subject ||
3428           (md->ctypes[*eptr++] & ctype_space) == 0)
3429        return FALSE;        return FALSE;
3430      ecode++;      ecode++;
3431      break;      break;
3432    
3433      case OP_NOT_WORDCHAR:      case OP_NOT_WORDCHAR:
3434      if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_word) != 0)      if (eptr >= md->end_subject ||
3435           (md->ctypes[*eptr++] & ctype_word) != 0)
3436        return FALSE;        return FALSE;
3437      ecode++;      ecode++;
3438      break;      break;
3439    
3440      case OP_WORDCHAR:      case OP_WORDCHAR:
3441      if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_word) == 0)      if (eptr >= md->end_subject ||
3442           (md->ctypes[*eptr++] & ctype_word) == 0)
3443        return FALSE;        return FALSE;
3444      ecode++;      ecode++;
3445      break;      break;
# Line 2596  for (;;) Line 3455  for (;;)
3455      case OP_REF:      case OP_REF:
3456        {        {
3457        int length;        int length;
3458        int number = ecode[1] << 1;                /* Doubled reference number */        int offset = ecode[1] << 1;                /* Doubled reference number */
3459        ecode += 2;                                /* Advance past the item */        ecode += 2;                                /* Advance past the item */
3460    
3461        if (number >= offset_top || md->offset_vector[number] < 0)        /* If the reference is unset, set the length to be longer than the amount
3462          {        of subject left; this ensures that every attempt at a match fails. We
3463          md->errorcode = PCRE_ERROR_BADREF;        can't just fail here, because of the possibility of quantifiers with zero
3464          return FALSE;        minima. */
3465          }  
3466          length = (offset >= offset_top || md->offset_vector[offset] < 0)?
3467            md->end_subject - eptr + 1 :
3468            md->offset_vector[offset+1] - md->offset_vector[offset];
3469    
3470        length = md->offset_vector[number+1] - md->offset_vector[number];        /* Set up for repetition, or handle the non-repeated case */
3471    
3472        switch (*ecode)        switch (*ecode)
3473          {          {
# Line 2632  for (;;) Line 3494  for (;;)
3494          break;          break;
3495    
3496          default:               /* No repeat follows */          default:               /* No repeat follows */
3497          if (!match_ref(number, eptr, length, md)) return FALSE;          if (!match_ref(offset, eptr, length, md, ims)) return FALSE;
3498          eptr += length;          eptr += length;
3499          continue;              /* With the main loop */          continue;              /* With the main loop */
3500          }          }
# Line 2648  for (;;) Line 3510  for (;;)
3510    
3511        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
3512          {          {
3513          if (!match_ref(number, eptr, length, md)) return FALSE;          if (!match_ref(offset, eptr, length, md, ims)) return FALSE;
3514          eptr += length;          eptr += length;
3515          }          }
3516    
# Line 2663  for (;;) Line 3525  for (;;)
3525          {          {
3526          for (i = min;; i++)          for (i = min;; i++)
3527            {            {
3528            if (match(eptr, ecode, offset_top, md)) return TRUE;            if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb))
3529            if (i >= max || !match_ref(number, eptr, length, md))              return TRUE;
3530              if (i >= max || !match_ref(offset, eptr, length, md, ims))
3531              return FALSE;              return FALSE;
3532            eptr += length;            eptr += length;
3533            }            }
# Line 2675  for (;;) Line 3538  for (;;)
3538    
3539        else        else
3540          {          {
3541          uschar *pp = eptr;          const uschar *pp = eptr;
3542          for (i = min; i < max; i++)          for (i = min; i < max; i++)
3543            {            {
3544            if (!match_ref(number, eptr, length, md)) break;            if (!match_ref(offset, eptr, length, md, ims)) break;
3545            eptr += length;            eptr += length;
3546            }            }
3547          while (eptr >= pp)          while (eptr >= pp)
3548            {            {
3549            if (match(eptr, ecode, offset_top, md)) return TRUE;            if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb))
3550                return TRUE;
3551            eptr -= length;            eptr -= length;
3552            }            }
3553          return FALSE;          return FALSE;
# Line 2691  for (;;) Line 3555  for (;;)
3555        }        }
3556      /* Control never gets here */      /* Control never gets here */
3557    
3558    
3559    
3560      /* Match a character class, possibly repeatedly. Look past the end of the      /* Match a character class, possibly repeatedly. Look past the end of the
3561      item to see if there is repeat information following. Then obey similar      item to see if there is repeat information following. Then obey similar
3562      code to character type repeats - written out again for speed. If caseless      code to character type repeats - written out again for speed. */
     matching was set at runtime but not at compile time, we have to check both  
     versions of a character. */  
3563    
3564      case OP_CLASS:      case OP_CLASS:
3565        {        {
3566        uschar *data = ecode + 1;  /* Save for matching */        const uschar *data = ecode + 1;  /* Save for matching */
3567        ecode += 33;               /* Advance past the item */        ecode += 33;                     /* Advance past the item */
3568    
3569        switch (*ecode)        switch (*ecode)
3570          {          {
# Line 2727  for (;;) Line 3591  for (;;)
3591          break;          break;
3592    
3593          default:               /* No repeat follows */          default:               /* No repeat follows */
3594          if (eptr >= md->end_subject) return FALSE;          min = max = 1;
3595          c = *eptr++;          break;
         if ((data[c/8] & (1 << (c&7))) != 0) continue;    /* With main loop */  
         if (md->runtime_caseless)  
           {  
           c = pcre_fcc[c];  
           if ((data[c/8] & (1 << (c&7))) != 0) continue;  /* With main loop */  
           }  
         return FALSE;  
3596          }          }
3597    
3598        /* First, ensure the minimum number of matches are present. */        /* First, ensure the minimum number of matches are present. */
# Line 2745  for (;;) Line 3602  for (;;)
3602          if (eptr >= md->end_subject) return FALSE;          if (eptr >= md->end_subject) return FALSE;
3603          c = *eptr++;          c = *eptr++;
3604          if ((data[c/8] & (1 << (c&7))) != 0) continue;          if ((data[c/8] & (1 << (c&7))) != 0) continue;
         if (md->runtime_caseless)  
           {  
           c = pcre_fcc[c];  
           if ((data[c/8] & (1 << (c&7))) != 0) continue;  
           }  
3605          return FALSE;          return FALSE;
3606          }          }
3607    
# Line 2765  for (;;) Line 3617  for (;;)
3617          {          {
3618          for (i = min;; i++)          for (i = min;; i++)
3619            {            {
3620            if (match(eptr, ecode, offset_top, md)) return TRUE;            if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb))
3621                return TRUE;
3622            if (i >= max || eptr >= md->end_subject) return FALSE;            if (i >= max || eptr >= md->end_subject) return FALSE;
3623            c = *eptr++;            c = *eptr++;
3624            if ((data[c/8] & (1 << (c&7))) != 0) continue;            if ((data[c/8] & (1 << (c&7))) != 0) continue;
           if (md->runtime_caseless)  
             {  
             c = pcre_fcc[c];  
             if ((data[c/8] & (1 << (c&7))) != 0) continue;  
             }  
3625            return FALSE;            return FALSE;
3626            }            }
3627          /* Control never gets here */          /* Control never gets here */
# Line 2783  for (;;) Line 3631  for (;;)
3631    
3632        else        else
3633          {          {
3634          uschar *pp = eptr;          const uschar *pp = eptr;
3635          for (i = min; i < max; eptr++, i++)          for (i = min; i < max; eptr++, i++)
3636            {            {
3637            if (eptr >= md->end_subject) break;            if (eptr >= md->end_subject) break;
3638            c = *eptr;            c = *eptr;
3639            if ((data[c/8] & (1 << (c&7))) != 0) continue;            if ((data[c/8] & (1 << (c&7))) != 0) continue;
           if (md->runtime_caseless)  
             {  
             c = pcre_fcc[c];  
             if ((data[c/8] & (1 << (c&7))) != 0) continue;  
             }  
3640            break;            break;
3641            }            }
3642    
3643          while (eptr >= pp)          while (eptr >= pp)
3644            if (match(eptr--, ecode, offset_top, md)) return TRUE;            if (match(eptr--, ecode, offset_top, md, ims, FALSE, eptrb))
3645                return TRUE;
3646          return FALSE;          return FALSE;
3647          }          }
3648        }        }
# Line 2811  for (;;) Line 3655  for (;;)
3655        register int length = ecode[1];        register int length = ecode[1];
3656        ecode += 2;        ecode += 2;
3657    
3658        #ifdef DEBUG  #ifdef DEBUG    /* Sigh. Some compilers never learn. */
3659        if (eptr >= md->end_subject)        if (eptr >= md->end_subject)
3660          printf("matching subject <null> against pattern ");          printf("matching subject <null> against pattern ");
3661        else        else
# Line 2822  for (;;) Line 3666  for (;;)
3666          }          }
3667        pchars(ecode, length, FALSE, md);        pchars(ecode, length, FALSE, md);
3668        printf("\n");        printf("\n");
3669        #endif  #endif
3670    
3671        if (length > md->end_subject - eptr) return FALSE;        if (length > md->end_subject - eptr) return FALSE;
3672        if (md->caseless)        if ((ims & PCRE_CASELESS) != 0)
3673          {          {
3674          while (length-- > 0) if (pcre_lcc[*ecode++] != pcre_lcc[*eptr++]) return FALSE;          while (length-- > 0)
3675              if (md->lcc[*ecode++] != md->lcc[*eptr++])
3676                return FALSE;
3677          }          }
3678        else        else
3679          {          {
# Line 2879  for (;;) Line 3725  for (;;)
3725      maximum. Alternatively, if maximizing, find the maximum number of      maximum. Alternatively, if maximizing, find the maximum number of
3726      characters and work backwards. */      characters and work backwards. */
3727    
3728      #ifdef DEBUG      DPRINTF(("matching %c{%d,%d} against subject %.*s\n", c, min, max,
3729      printf("matching %c{%d,%d} against subject %.*s\n", c, min, max,        max, eptr));
       max, eptr);  
     #endif  
3730    
3731      if (md->caseless)      if ((ims & PCRE_CASELESS) != 0)
3732        {        {
3733        c = pcre_lcc[c];        c = md->lcc[c];
3734        for (i = 1; i <= min; i++) if (c != pcre_lcc[*eptr++]) return FALSE;        for (i = 1; i <= min; i++)
3735            if (c != md->lcc[*eptr++]) return FALSE;
3736        if (min == max) continue;        if (min == max) continue;
3737        if (minimize)        if (minimize)
3738          {          {
3739          for (i = min;; i++)          for (i = min;; i++)
3740            {            {
3741            if (match(eptr, ecode, offset_top, md)) return TRUE;            if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb))
3742            if (i >= max || eptr >= md->end_subject || c != pcre_lcc[*eptr++])              return TRUE;
3743              if (i >= max || eptr >= md->end_subject ||
3744                  c != md->lcc[*eptr++])
3745              return FALSE;              return FALSE;
3746            }            }
3747          /* Control never gets here */          /* Control never gets here */
3748          }          }
3749        else        else
3750          {          {
3751          uschar *pp = eptr;          const uschar *pp = eptr;
3752          for (i = min; i < max; i++)          for (i = min; i < max; i++)
3753            {            {
3754            if (eptr >= md->end_subject || c != pcre_lcc[*eptr]) break;            if (eptr >= md->end_subject || c != md->lcc[*eptr]) break;
3755            eptr++;            eptr++;
3756            }            }
3757          while (eptr >= pp)          while (eptr >= pp)
3758            if (match(eptr--, ecode, offset_top, md)) return TRUE;            if (match(eptr--, ecode, offset_top, md, ims, FALSE, eptrb))
3759                return TRUE;
3760          return FALSE;          return FALSE;
3761          }          }
3762        /* Control never gets here */        /* Control never gets here */
# Line 2924  for (;;) Line 3772  for (;;)
3772          {          {
3773          for (i = min;; i++)          for (i = min;; i++)
3774            {            {
3775            if (match(eptr, ecode, offset_top, md)) return TRUE;            if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb))
3776                return TRUE;
3777            if (i >= max || eptr >= md->end_subject || c != *eptr++) return FALSE;            if (i >= max || eptr >= md->end_subject || c != *eptr++) return FALSE;
3778            }            }
3779          /* Control never gets here */          /* Control never gets here */
3780          }          }
3781        else        else
3782          {          {
3783          uschar *pp = eptr;          const uschar *pp = eptr;
3784          for (i = min; i < max; i++)          for (i = min; i < max; i++)
3785            {            {
3786            if (eptr >= md->end_subject || c != *eptr) break;            if (eptr >= md->end_subject || c != *eptr) break;
3787            eptr++;            eptr++;
3788            }            }
3789          while (eptr >= pp)          while (eptr >= pp)
3790           if (match(eptr--, ecode, offset_top, md)) return TRUE;           if (match(eptr--, ecode, offset_top, md, ims, FALSE, eptrb))
3791               return TRUE;
3792          return FALSE;          return FALSE;
3793          }          }
3794        }        }
# Line 2947  for (;;) Line 3797  for (;;)
3797      /* Match a negated single character */      /* Match a negated single character */
3798    
3799      case OP_NOT:      case OP_NOT:
3800      if (eptr > md->end_subject) return FALSE;      if (eptr >= md->end_subject) return FALSE;
3801      ecode++;      ecode++;
3802      if (md->caseless)      if ((ims & PCRE_CASELESS) != 0)
3803        {        {
3804        if (pcre_lcc[*ecode++] == pcre_lcc[*eptr++]) return FALSE;        if (md->lcc[*ecode++] == md->lcc[*eptr++]) return FALSE;
3805        }        }
3806      else      else
3807        {        {
# Line 3006  for (;;) Line 3856  for (;;)
3856      maximum. Alternatively, if maximizing, find the maximum number of      maximum. Alternatively, if maximizing, find the maximum number of
3857      characters and work backwards. */      characters and work backwards. */
3858    
3859      #ifdef DEBUG      DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", c, min, max,
3860      printf("negative matching %c{%d,%d} against subject %.*s\n", c, min, max,        max, eptr));
       max, eptr);  
     #endif  
3861    
3862      if (md->caseless)      if ((ims & PCRE_CASELESS) != 0)
3863        {        {
3864        c = pcre_lcc[c];        c = md->lcc[c];
3865        for (i = 1; i <= min; i++) if (c == pcre_lcc[*eptr++]) return FALSE;        for (i = 1; i <= min; i++)
3866            if (c == md->lcc[*eptr++]) return FALSE;
3867        if (min == max) continue;        if (min == max) continue;
3868        if (minimize)        if (minimize)
3869          {          {
3870          for (i = min;; i++)          for (i = min;; i++)
3871            {            {
3872            if (match(eptr, ecode, offset_top, md)) return TRUE;            if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb))
3873            if (i >= max || eptr >= md->end_subject || c == pcre_lcc[*eptr++])              return TRUE;
3874              if (i >= max || eptr >= md->end_subject ||
3875                  c == md->lcc[*eptr++])
3876              return FALSE;              return FALSE;
3877            }            }
3878          /* Control never gets here */          /* Control never gets here */
3879          }          }
3880        else        else
3881          {          {
3882          uschar *pp = eptr;          const uschar *pp = eptr;
3883          for (i = min; i < max; i++)          for (i = min; i < max; i++)
3884            {            {
3885            if (eptr >= md->end_subject || c == pcre_lcc[*eptr]) break;            if (eptr >= md->end_subject || c == md->lcc[*eptr]) break;
3886            eptr++;            eptr++;
3887            }            }
3888          while (eptr >= pp)          while (eptr >= pp)
3889            if (match(eptr--, ecode, offset_top, md)) return TRUE;            if (match(eptr--, ecode, offset_top, md, ims, FALSE, eptrb))
3890                return TRUE;
3891          return FALSE;          return FALSE;
3892          }          }
3893        /* Control never gets here */        /* Control never gets here */
# Line 3051  for (;;) Line 3903  for (;;)
3903          {          {
3904          for (i = min;; i++)          for (i = min;; i++)
3905            {            {
3906            if (match(eptr, ecode, offset_top, md)) return TRUE;            if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb))
3907                return TRUE;
3908            if (i >= max || eptr >= md->end_subject || c == *eptr++) return FALSE;            if (i >= max || eptr >= md->end_subject || c == *eptr++) return FALSE;
3909            }            }
3910          /* Control never gets here */          /* Control never gets here */
3911          }          }
3912        else        else
3913          {          {
3914          uschar *pp = eptr;          const uschar *pp = eptr;
3915          for (i = min; i < max; i++)          for (i = min; i < max; i++)
3916            {            {
3917            if (eptr >= md->end_subject || c == *eptr) break;            if (eptr >= md->end_subject || c == *eptr) break;
3918            eptr++;            eptr++;
3919            }            }
3920          while (eptr >= pp)          while (eptr >= pp)
3921           if (match(eptr--, ecode, offset_top, md)) return TRUE;           if (match(eptr--, ecode, offset_top, md, ims, FALSE, eptrb))
3922               return TRUE;
3923          return FALSE;          return FALSE;
3924          }          }
3925        }        }
# Line 3115  for (;;) Line 3969  for (;;)
3969      if (min > 0) switch(ctype)      if (min > 0) switch(ctype)
3970        {        {
3971        case OP_ANY:        case OP_ANY:
3972        if (!md->dotall)        if ((ims & PCRE_DOTALL) == 0)
3973          { for (i = 1; i <= min; i++) if (*eptr++ == '\n') return FALSE; }          { for (i = 1; i <= min; i++) if (*eptr++ == '\n') return FALSE; }
3974        else eptr += min;        else eptr += min;
3975        break;        break;
3976    
3977        case OP_NOT_DIGIT:        case OP_NOT_DIGIT:
3978        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
3979          if ((pcre_ctypes[*eptr++] & ctype_digit) != 0) return FALSE;          if ((md->ctypes[*eptr++] & ctype_digit) != 0) return FALSE;
3980        break;        break;
3981    
3982        case OP_DIGIT:        case OP_DIGIT:
3983        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
3984          if ((pcre_ctypes[*eptr++] & ctype_digit) == 0) return FALSE;          if ((md->ctypes[*eptr++] & ctype_digit) == 0) return FALSE;
3985        break;        break;
3986    
3987        case OP_NOT_WHITESPACE:        case OP_NOT_WHITESPACE:
3988        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
3989          if ((pcre_ctypes[*eptr++] & ctype_space) != 0) return FALSE;          if ((md->ctypes[*eptr++] & ctype_space) != 0) return FALSE;
3990        break;        break;
3991    
3992        case OP_WHITESPACE:        case OP_WHITESPACE:
3993        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
3994          if ((pcre_ctypes[*eptr++] & ctype_space) == 0) return FALSE;          if ((md->ctypes[*eptr++] & ctype_space) == 0) return FALSE;
3995        break;        break;
3996    
3997        case OP_NOT_WORDCHAR:        case OP_NOT_WORDCHAR:
3998        for (i = 1; i <= min; i++) if ((pcre_ctypes[*eptr++] & ctype_word) != 0)        for (i = 1; i <= min; i++)
3999          return FALSE;          if ((md->ctypes[*eptr++] & ctype_word) != 0)
4000              return FALSE;
4001        break;        break;
4002    
4003        case OP_WORDCHAR:        case OP_WORDCHAR:
4004        for (i = 1; i <= min; i++) if ((pcre_ctypes[*eptr++] & ctype_word) == 0)        for (i = 1; i <= min; i++)
4005          return FALSE;          if ((md->ctypes[*eptr++] & ctype_word) == 0)
4006              return FALSE;
4007        break;        break;
4008        }        }
4009    
# Line 3156  for (;;) Line 4012  for (;;)
4012      if (min == max) continue;      if (min == max) continue;
4013    
4014      /* If minimizing, we have to test the rest of the pattern before each      /* If minimizing, we have to test the rest of the pattern before each
4015      subsequent match, so inlining isn't much help; just use the function. */      subsequent match. */
4016    
4017      if (minimize)      if (minimize)
4018        {        {
4019        for (i = min;; i++)        for (i = min;; i++)
4020          {          {
4021          if (match(eptr, ecode, offset_top, md)) return TRUE;          if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb)) return TRUE;
4022          if (i >= max || eptr >= md->end_subject ||          if (i >= max || eptr >= md->end_subject) return FALSE;
4023            !match_type(ctype, *eptr++, md->dotall))  
4024              return FALSE;          c = *eptr++;
4025            switch(ctype)
4026              {
4027              case OP_ANY:
4028              if ((ims & PCRE_DOTALL) == 0 && c == '\n') return FALSE;
4029              break;
4030    
4031              case OP_NOT_DIGIT:
4032              if ((md->ctypes[c] & ctype_digit) != 0) return FALSE;
4033              break;
4034    
4035              case OP_DIGIT:
4036              if ((md->ctypes[c] & ctype_digit) == 0) return FALSE;
4037              break;
4038    
4039              case OP_NOT_WHITESPACE:
4040              if ((md->ctypes[c] & ctype_space) != 0) return FALSE;
4041              break;
4042    
4043              case OP_WHITESPACE:
4044              if  ((md->ctypes[c] & ctype_space) == 0) return FALSE;
4045              break;
4046    
4047              case OP_NOT_WORDCHAR:
4048              if ((md->ctypes[c] & ctype_word) != 0) return FALSE;
4049              break;
4050    
4051              case OP_WORDCHAR:
4052              if ((md->ctypes[c] & ctype_word) == 0) return FALSE;
4053              break;
4054              }
4055          }          }
4056        /* Control never gets here */        /* Control never gets here */
4057        }        }
# Line 3175  for (;;) Line 4061  for (;;)
4061    
4062      else      else
4063        {        {
4064        uschar *pp = eptr;        const uschar *pp = eptr;
4065        switch(ctype)        switch(ctype)
4066          {          {
4067          case OP_ANY:          case OP_ANY:
4068          if (!md->dotall)          if ((ims & PCRE_DOTALL) == 0)
4069            {            {
4070            for (i = min; i < max; i++)            for (i = min; i < max; i++)
4071              {              {
# Line 3198  for (;;) Line 4084  for (;;)
4084          case OP_NOT_DIGIT:          case OP_NOT_DIGIT:
4085          for (i = min; i < max; i++)          for (i = min; i < max; i++)
4086            {            {
4087            if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_digit) != 0)            if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) != 0)
4088              break;              break;
4089            eptr++;            eptr++;
4090            }            }
# Line 3207  for (;;) Line 4093  for (;;)
4093          case OP_DIGIT:          case OP_DIGIT:
4094          for (i = min; i < max; i++)          for (i = min; i < max; i++)
4095            {            {
4096            if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_digit) == 0)            if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) == 0)
4097              break;              break;
4098            eptr++;            eptr++;
4099            }            }
# Line 3216  for (;;) Line 4102  for (;;)
4102          case OP_NOT_WHITESPACE:          case OP_NOT_WHITESPACE:
4103          for (i = min; i < max; i++)          for (i = min; i < max; i++)
4104            {            {
4105            if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_space) != 0)            if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) != 0)
4106              break;              break;
4107            eptr++;            eptr++;
4108            }            }
# Line 3225  for (;;) Line 4111  for (;;)
4111          case OP_WHITESPACE:          case OP_WHITESPACE:
4112          for (i = min; i < max; i++)          for (i = min; i < max; i++)
4113            {            {
4114            if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_space) == 0)            if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) == 0)
4115              break;              break;
4116            eptr++;            eptr++;
4117            }            }
# Line 3234  for (;;) Line 4120  for (;;)
4120          case OP_NOT_WORDCHAR:          case OP_NOT_WORDCHAR:
4121          for (i = min; i < max; i++)          for (i = min; i < max; i++)
4122            {            {
4123            if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_word) != 0)            if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) != 0)
4124              break;              break;
4125            eptr++;            eptr++;
4126            }            }
# Line 3243  for (;;) Line 4129  for (;;)
4129          case OP_WORDCHAR:          case OP_WORDCHAR:
4130          for (i = min; i < max; i++)          for (i = min; i < max; i++)
4131            {            {
4132            if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_word) == 0)            if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) == 0)
4133              break;              break;
4134            eptr++;            eptr++;
4135            }            }
# Line 3251  for (;;) Line 4137  for (;;)
4137          }          }
4138    
4139        while (eptr >= pp)        while (eptr >= pp)
4140          if (match(eptr--, ecode, offset_top, md)) return TRUE;          if (match(eptr--, ecode, offset_top, md, ims, FALSE, eptrb))
4141              return TRUE;
4142        return FALSE;        return FALSE;
4143        }        }
4144      /* Control never gets here */      /* Control never gets here */
# Line 3259  for (;;) Line 4146  for (;;)
4146      /* There's been some horrible disaster. */      /* There's been some horrible disaster. */
4147    
4148      default:      default:
4149      #ifdef DEBUG      DPRINTF(("Unknown opcode %d\n", *ecode));
     printf("Unknown opcode %d\n", *ecode);  
     #endif  
4150      md->errorcode = PCRE_ERROR_UNKNOWN_NODE;      md->errorcode = PCRE_ERROR_UNKNOWN_NODE;
4151      return FALSE;      return FALSE;
4152      }      }
# Line 3276  for (;;) Line 4161  for (;;)
4161    
4162    
4163    
4164    
4165  /*************************************************  /*************************************************
4166  *         Execute a Regular Expression           *  *         Execute a Regular Expression           *
4167  *************************************************/  *************************************************/
# Line 3289  Arguments: Line 4175  Arguments:
4175    external_extra  points to "hints" from pcre_study() or is NULL    external_extra  points to "hints" from pcre_study() or is NULL
4176    subject         points to the subject string    subject         points to the subject string
4177    length          length of subject string (may contain binary zeros)    length          length of subject string (may contain binary zeros)
4178      start_offset    where to start in the subject string
4179    options         option bits    options         option bits
4180    offsets         points to a vector of ints to be filled in with offsets    offsets         points to a vector of ints to be filled in with offsets
4181    offsetcount     the number of elements in the vector    offsetcount     the number of elements in the vector
# Line 3301  Returns:          > 0 => success; value Line 4188  Returns:          > 0 => success; value
4188    
4189  int  int
4190  pcre_exec(const pcre *external_re, const pcre_extra *external_extra,  pcre_exec(const pcre *external_re, const pcre_extra *external_extra,
4191    const char *subject, int length, int options, int *offsets, int offsetcount)    const char *subject, int length, int start_offset, int options, int *offsets,
4192      int offsetcount)
4193  {  {
4194  int resetcount;  int resetcount, ocount;
 int ocount = offsetcount;  
4195  int first_char = -1;  int first_char = -1;
4196    int req_char = -1;
4197    int req_char2 = -1;
4198    unsigned long int ims = 0;
4199  match_data match_block;  match_data match_block;
4200  uschar *start_bits = NULL;  const uschar *start_bits = NULL;
4201  uschar *start_match = (uschar *)subject;  const uschar *start_match = (const uschar *)subject + start_offset;
4202  uschar *end_subject;  const uschar *end_subject;
4203  real_pcre *re = (real_pcre *)external_re;  const uschar *req_char_ptr = start_match - 1;
4204  real_pcre_extra *extra = (real_pcre_extra *)external_extra;  const real_pcre *re = (const real_pcre *)external_re;
4205    const real_pcre_extra *extra = (const real_pcre_extra *)external_extra;
4206    BOOL using_temporary_offsets = FALSE;
4207  BOOL anchored = ((re->options | options) & PCRE_ANCHORED) != 0;  BOOL anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
4208  BOOL startline = (re->options & PCRE_STARTLINE) != 0;  BOOL startline = (re->options & PCRE_STARTLINE) != 0;
4209    
# Line 3321  if (re == NULL || subject == NULL || Line 4213  if (re == NULL || subject == NULL ||
4213     (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;     (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
4214  if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;  if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;
4215    
4216  match_block.start_subject = (uschar *)subject;  match_block.start_subject = (const uschar *)subject;
4217  match_block.end_subject = match_block.start_subject + length;  match_block.end_subject = match_block.start_subject + length;
4218  end_subject = match_block.end_subject;  end_subject = match_block.end_subject;
4219    
4220  match_block.caseless  = ((re->options | options) & PCRE_CASELESS) != 0;  match_block.endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
 match_block.runtime_caseless = match_block.caseless &&  
   (re->options & PCRE_CASELESS) == 0;  
   
 match_block.multiline = ((re->options | options) & PCRE_MULTILINE) != 0;  
 match_block.dotall    = ((re->options | options) & PCRE_DOTALL) != 0;  
 match_block.endonly   = ((re->options | options) & PCRE_DOLLAR_ENDONLY) != 0;  
4221    
4222  match_block.notbol = (options & PCRE_NOTBOL) != 0;  match_block.notbol = (options & PCRE_NOTBOL) != 0;
4223  match_block.noteol = (options & PCRE_NOTEOL) != 0;  match_block.noteol = (options & PCRE_NOTEOL) != 0;
4224    match_block.notempty = (options & PCRE_NOTEMPTY) != 0;
4225    
4226  match_block.errorcode = PCRE_ERROR_NOMATCH;     /* Default error */  match_block.errorcode = PCRE_ERROR_NOMATCH;     /* Default error */
4227    
4228    match_block.lcc = re->tables + lcc_offset;
4229    match_block.ctypes = re->tables + ctypes_offset;
4230    
4231    /* The ims options can vary during the matching as a result of the presence
4232    of (?ims) items in the pattern. They are kept in a local variable so that
4233    restoring at the exit of a group is easy. */
4234    
4235    ims = re->options & (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL);
4236    
4237  /* If the expression has got more back references than the offsets supplied can  /* If the expression has got more back references than the offsets supplied can