/[pcre]/code/trunk/pcre.c
ViewVC logotype

Diff of /code/trunk/pcre.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 15 by nigel, Sat Feb 24 21:38:25 2007 UTC revision 47 by nigel, Sat Feb 24 21:39:29 2007 UTC
# Line 9  the file Tech.Notes for some information Line 9  the file Tech.Notes for some information
9    
10  Written by: Philip Hazel <ph10@cam.ac.uk>  Written by: Philip Hazel <ph10@cam.ac.uk>
11    
12             Copyright (c) 1998 University of Cambridge             Copyright (c) 1997-2000 University of Cambridge
13    
14  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
15  Permission is granted to anyone to use this software for any purpose on any  Permission is granted to anyone to use this software for any purpose on any
# Line 25  restrictions: Line 25  restrictions:
25    
26  3. Altered versions must be plainly marked as such, and must not be  3. Altered versions must be plainly marked as such, and must not be
27     misrepresented as being the original software.     misrepresented as being the original software.
28    
29    4. If PCRE is embedded in any software that is released under the GNU
30       General Purpose Licence (GPL), then the terms of that licence shall
31       supersede any condition above with which it is incompatible.
32  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
33  */  */
34    
# Line 33  restrictions: Line 37  restrictions:
37    
38  /* #define DEBUG */  /* #define DEBUG */
39    
40  /* Use a macro for debugging printing, 'cause that eliminates the the use  /* Use a macro for debugging printing, 'cause that eliminates the use of #ifdef
41  of #ifdef inline, and there are *still* stupid compilers about that don't like  inline, and there are *still* stupid compilers about that don't like indented
42  indented pre-processor statements. I suppose it's only been 10 years... */  pre-processor statements. I suppose it's only been 10 years... */
43    
44  #ifdef DEBUG  #ifdef DEBUG
45  #define DPRINTF(p) printf p  #define DPRINTF(p) printf p
# Line 56  the external pcre header. */ Line 60  the external pcre header. */
60  #endif  #endif
61    
62    
63    /* Number of items on the nested bracket stacks at compile time. This should
64    not be set greater than 200. */
65    
66    #define BRASTACK_SIZE 200
67    
68    
69  /* Min and max values for the common repeats; for the maxima, 0 => infinity */  /* Min and max values for the common repeats; for the maxima, 0 => infinity */
70    
71  static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };  static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
# Line 66  static const char rep_max[] = { 0, 0, 0, Line 76  static const char rep_max[] = { 0, 0, 0,
76  #ifdef DEBUG  #ifdef DEBUG
77  static const char *OP_names[] = {  static const char *OP_names[] = {
78    "End", "\\A", "\\B", "\\b", "\\D", "\\d",    "End", "\\A", "\\B", "\\b", "\\D", "\\d",
79    "\\S", "\\s", "\\W", "\\w", "Cut", "\\Z", "^", "$", "Any", "chars",    "\\S", "\\s", "\\W", "\\w", "\\Z", "\\z",
80    "not",    "Opt", "^", "$", "Any", "chars", "not",
81    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
82    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
83    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
84    "*", "*?", "+", "+?", "?", "??", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{",
85    "class", "negclass", "Ref",    "class", "Ref", "Recurse",
86    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", "Once",    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",
87      "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",
88    "Brazero", "Braminzero", "Bra"    "Brazero", "Braminzero", "Bra"
89  };  };
90  #endif  #endif
# Line 93  static const short int escapes[] = { Line 104  static const short int escapes[] = {
104    '`',      7, -ESC_b,      0, -ESC_d,     27,   '\f',      0,   /* ` - g */    '`',      7, -ESC_b,      0, -ESC_d,     27,   '\f',      0,   /* ` - g */
105      0,      0,      0,      0,      0,      0,   '\n',      0,   /* h - o */      0,      0,      0,      0,      0,      0,   '\n',      0,   /* h - o */
106      0,      0,   '\r', -ESC_s,   '\t',      0,      0, -ESC_w,   /* p - w */      0,      0,   '\r', -ESC_s,   '\t',      0,      0, -ESC_w,   /* p - w */
107      0,      0,      0                                            /* x - z */      0,      0, -ESC_z                                            /* x - z */
108    };
109    
110    /* Tables of names of POSIX character classes and their lengths. The list is
111    terminated by a zero length entry. The first three must be alpha, upper, lower,
112    as this is assumed for handling case independence. */
113    
114    static const char *posix_names[] = {
115      "alpha", "lower", "upper",
116      "alnum", "ascii", "cntrl", "digit", "graph",
117      "print", "punct", "space", "word",  "xdigit" };
118    
119    static const uschar posix_name_lengths[] = {
120      5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 6, 0 };
121    
122    /* Table of class bit maps for each POSIX class; up to three may be combined
123    to form the class. */
124    
125    static const int posix_class_maps[] = {
126      cbit_lower, cbit_upper, -1,             /* alpha */
127      cbit_lower, -1,         -1,             /* lower */
128      cbit_upper, -1,         -1,             /* upper */
129      cbit_digit, cbit_lower, cbit_upper,     /* alnum */
130      cbit_print, cbit_cntrl, -1,             /* ascii */
131      cbit_cntrl, -1,         -1,             /* cntrl */
132      cbit_digit, -1,         -1,             /* digit */
133      cbit_graph, -1,         -1,             /* graph */
134      cbit_print, -1,         -1,             /* print */
135      cbit_punct, -1,         -1,             /* punct */
136      cbit_space, -1,         -1,             /* space */
137      cbit_word,  -1,         -1,             /* word */
138      cbit_xdigit,-1,         -1              /* xdigit */
139  };  };
140    
141    
142  /* Definition to allow mutual recursion */  /* Definition to allow mutual recursion */
143    
144  static BOOL  static BOOL
145    compile_regex(int, int *, uschar **, const uschar **, const char **);    compile_regex(int, int, int *, uschar **, const uschar **, const char **,
146        BOOL, int, int *, int *, compile_data *);
147    
148    /* Structure for building a chain of data that actually lives on the
149    stack, for holding the values of the subject pointer at the start of each
150    subpattern, so as to detect when an empty string has been matched by a
151    subpattern - to break infinite loops. */
152    
153    typedef struct eptrblock {
154      struct eptrblock *prev;
155      const uschar *saved_eptr;
156    } eptrblock;
157    
158  /* Structure for passing "static" information around between the functions  /* Flag bits for the match() function */
 doing the matching, so that they are thread-safe. */  
159    
160  typedef struct match_data {  #define match_condassert   0x01    /* Called to check a condition assertion */
161    int    errorcode;             /* As it says */  #define match_isgroup      0x02    /* Set if start of bracketed group */
   int   *offset_vector;         /* Offset vector */  
   int    offset_end;            /* One past the end */  
   BOOL   offset_overflow;       /* Set if too many extractions */  
   BOOL   caseless;              /* Case-independent flag */  
   BOOL   runtime_caseless;      /* Caseless forced at run time */  
   BOOL   multiline;             /* Multiline flag */  
   BOOL   notbol;                /* NOTBOL flag */  
   BOOL   noteol;                /* NOTEOL flag */  
   BOOL   dotall;                /* Dot matches any char */  
   BOOL   endonly;               /* Dollar not before final \n */  
   const uschar *start_subject;  /* Start of the subject string */  
   const uschar *end_subject;    /* End of the subject string */  
   jmp_buf fail_env;             /* Environment for longjump() break out */  
   const uschar *end_match_ptr;  /* Subject position at end match */  
   int     end_offset_top;       /* Highwater mark at end of match */  
 } match_data;  
162    
163    
164    
# Line 141  void  (*pcre_free)(void *) = free; Line 178  void  (*pcre_free)(void *) = free;
178    
179    
180  /*************************************************  /*************************************************
181    *             Default character tables           *
182    *************************************************/
183    
184    /* A default set of character tables is included in the PCRE binary. Its source
185    is built by the maketables auxiliary program, which uses the default C ctypes
186    functions, and put in the file chartables.c. These tables are used by PCRE
187    whenever the caller of pcre_compile() does not provide an alternate set of
188    tables. */
189    
190    #include "chartables.c"
191    
192    
193    
194    /*************************************************
195  *          Return version string                 *  *          Return version string                 *
196  *************************************************/  *************************************************/
197    
198    #define STRING(a)  # a
199    #define XSTRING(s) STRING(s)
200    
201  const char *  const char *
202  pcre_version(void)  pcre_version(void)
203  {  {
204  return PCRE_VERSION;  return XSTRING(PCRE_MAJOR) "." XSTRING(PCRE_MINOR) " " XSTRING(PCRE_DATE);
205  }  }
206    
207    
208    
209    
210  /*************************************************  /*************************************************
211  *       Return info about a compiled pattern     *  * (Obsolete) Return info about compiled pattern  *
212  *************************************************/  *************************************************/
213    
214  /* This function picks potentially useful data out of the private  /* This is the original "info" function. It picks potentially useful data out
215  structure.  of the private structure, but its interface was too rigid. It remains for
216    backwards compatibility. The public options are passed back in an int - though
217    the re->options field has been expanded to a long int, all the public options
218    at the low end of it, and so even on 16-bit systems this will still be OK.
219    Therefore, I haven't changed the API for pcre_info().
220    
221  Arguments:  Arguments:
222    external_re   points to compiled code    external_re   points to compiled code
# Line 167  Arguments: Line 225  Arguments:
225                  or -1 if multiline and all branches start ^,                  or -1 if multiline and all branches start ^,
226                  or -2 otherwise                  or -2 otherwise
227    
228  Returns:        number of identifying extraction brackets  Returns:        number of capturing subpatterns
229                  or negative values on error                  or negative values on error
230  */  */
231    
# Line 177  pcre_info(const pcre *external_re, int * Line 235  pcre_info(const pcre *external_re, int *
235  const real_pcre *re = (const real_pcre *)external_re;  const real_pcre *re = (const real_pcre *)external_re;
236  if (re == NULL) return PCRE_ERROR_NULL;  if (re == NULL) return PCRE_ERROR_NULL;
237  if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;  if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;
238  if (optptr != NULL) *optptr = (re->options & PUBLIC_OPTIONS);  if (optptr != NULL) *optptr = (int)(re->options & PUBLIC_OPTIONS);
239  if (first_char != NULL)  if (first_char != NULL)
240    *first_char = ((re->options & PCRE_FIRSTSET) != 0)? re->first_char :    *first_char = ((re->options & PCRE_FIRSTSET) != 0)? re->first_char :
241       ((re->options & PCRE_STARTLINE) != 0)? -1 : -2;       ((re->options & PCRE_STARTLINE) != 0)? -1 : -2;
# Line 186  return re->top_bracket; Line 244  return re->top_bracket;
244    
245    
246    
247    /*************************************************
248    *        Return info about compiled pattern      *
249    *************************************************/
250    
251    /* This is a newer "info" function which has an extensible interface so
252    that additional items can be added compatibly.
253    
254    Arguments:
255      external_re      points to compiled code
256      external_study   points to study data, or NULL
257      what             what information is required
258      where            where to put the information
259    
260    Returns:           0 if data returned, negative on error
261    */
262    
263    int
264    pcre_fullinfo(const pcre *external_re, const pcre_extra *study_data, int what,
265      void *where)
266    {
267    const real_pcre *re = (const real_pcre *)external_re;
268    const real_pcre_extra *study = (const real_pcre_extra *)study_data;
269    
270    if (re == NULL || where == NULL) return PCRE_ERROR_NULL;
271    if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;
272    
273    switch (what)
274      {
275      case PCRE_INFO_OPTIONS:
276      *((unsigned long int *)where) = re->options & PUBLIC_OPTIONS;
277      break;
278    
279      case PCRE_INFO_SIZE:
280      *((size_t *)where) = re->size;
281      break;
282    
283      case PCRE_INFO_CAPTURECOUNT:
284      *((int *)where) = re->top_bracket;
285      break;
286    
287      case PCRE_INFO_BACKREFMAX:
288      *((int *)where) = re->top_backref;
289      break;
290    
291      case PCRE_INFO_FIRSTCHAR:
292      *((int *)where) =
293        ((re->options & PCRE_FIRSTSET) != 0)? re->first_char :
294        ((re->options & PCRE_STARTLINE) != 0)? -1 : -2;
295      break;
296    
297      case PCRE_INFO_FIRSTTABLE:
298      *((const uschar **)where) =
299        (study != NULL && (study->options & PCRE_STUDY_MAPPED) != 0)?
300          study->start_bits : NULL;
301      break;
302    
303      case PCRE_INFO_LASTLITERAL:
304      *((int *)where) =
305        ((re->options & PCRE_REQCHSET) != 0)? re->req_char : -1;
306      break;
307    
308      default: return PCRE_ERROR_BADOPTION;
309      }
310    
311    return 0;
312    }
313    
314    
315    
316  #ifdef DEBUG  #ifdef DEBUG
317  /*************************************************  /*************************************************
# Line 218  while (length-- > 0) Line 344  while (length-- > 0)
344    
345    
346  /*************************************************  /*************************************************
 *         Check subpattern for empty operand     *  
 *************************************************/  
   
 /* This function checks a bracketed subpattern to see if any of the paths  
 through it could match an empty string. This is used to diagnose an error if  
 such a subpattern is followed by a quantifier with an unlimited upper bound.  
   
 Argument:  
   code      points to the opening bracket  
   
 Returns:    TRUE or FALSE  
 */  
   
 static BOOL  
 could_be_empty(uschar *code)  
 {  
 do {  
   uschar *cc = code + 3;  
   
   /* Scan along the opcodes for this branch; as soon as we find something  
   that matches a non-empty string, break out and advance to test the next  
   branch. If we get to the end of the branch, return TRUE for the whole  
   sub-expression. */  
   
   for (;;)  
     {  
     /* Test an embedded subpattern; if it could not be empty, break the  
     loop. Otherwise carry on in the branch. */  
   
     if ((int)(*cc) >= OP_BRA || (int)(*cc) == OP_ONCE)  
       {  
       if (!could_be_empty(cc)) break;  
       do cc += (cc[1] << 8) + cc[2]; while (*cc == OP_ALT);  
       cc += 3;  
       }  
   
     else switch (*cc)  
       {  
       /* Reached end of a branch: the subpattern may match the empty string */  
   
       case OP_ALT:  
       case OP_KET:  
       case OP_KETRMAX:  
       case OP_KETRMIN:  
       return TRUE;  
   
       /* Skip over assertive subpatterns */  
   
       case OP_ASSERT:  
       case OP_ASSERT_NOT:  
       do cc += (cc[1] << 8) + cc[2]; while (*cc == OP_ALT);  
       cc += 3;  
       break;  
   
       /* Skip over things that don't match chars */  
   
       case OP_SOD:  
       case OP_EOD:  
       case OP_CIRC:  
       case OP_DOLL:  
       case OP_BRAZERO:  
       case OP_BRAMINZERO:  
       case OP_NOT_WORD_BOUNDARY:  
       case OP_WORD_BOUNDARY:  
       cc++;  
       break;  
   
       /* Skip over simple repeats with zero lower bound */  
   
       case OP_STAR:  
       case OP_MINSTAR:  
       case OP_QUERY:  
       case OP_MINQUERY:  
       case OP_NOTSTAR:  
       case OP_NOTMINSTAR:  
       case OP_NOTQUERY:  
       case OP_NOTMINQUERY:  
       case OP_TYPESTAR:  
       case OP_TYPEMINSTAR:  
       case OP_TYPEQUERY:  
       case OP_TYPEMINQUERY:  
       cc += 2;  
       break;  
   
       /* Skip over UPTOs (lower bound is zero) */  
   
       case OP_UPTO:  
       case OP_MINUPTO:  
       case OP_TYPEUPTO:  
       case OP_TYPEMINUPTO:  
       cc += 4;  
       break;  
   
       /* Check a class or a back reference for a zero minimum */  
   
       case OP_CLASS:  
       case OP_NEGCLASS:  
       case OP_REF:  
       cc += (*cc == OP_REF)? 2 : 33;  
   
       switch (*cc)  
         {  
         case OP_CRSTAR:  
         case OP_CRMINSTAR:  
         case OP_CRQUERY:  
         case OP_CRMINQUERY:  
         cc++;  
         break;  
   
         case OP_CRRANGE:  
         case OP_CRMINRANGE:  
         if ((cc[1] << 8) + cc[2] != 0) goto NEXT_BRANCH;  
         cc += 3;  
         break;  
   
         default:  
         goto NEXT_BRANCH;  
         }  
       break;  
   
       /* Anything else matches at least one character */  
   
       default:  
       goto NEXT_BRANCH;  
       }  
     }  
   
   NEXT_BRANCH:  
   code += (code[1] << 8) + code[2];  
   }  
 while (*code == OP_ALT);  
   
 /* No branches match the empty string */  
   
 return FALSE;  
 }  
   
   
   
 /*************************************************  
347  *            Handle escapes                      *  *            Handle escapes                      *
348  *************************************************/  *************************************************/
349    
# Line 373  Arguments: Line 359  Arguments:
359    bracount   number of previous extracting brackets    bracount   number of previous extracting brackets
360    options    the options bits    options    the options bits
361    isclass    TRUE if inside a character class    isclass    TRUE if inside a character class
362      cd         pointer to char tables block
363    
364  Returns:     zero or positive => a data character  Returns:     zero or positive => a data character
365               negative => a special escape sequence               negative => a special escape sequence
# Line 381  Returns:     zero or positive => a data Line 368  Returns:     zero or positive => a data
368    
369  static int  static int
370  check_escape(const uschar **ptrptr, const char **errorptr, int bracount,  check_escape(const uschar **ptrptr, const char **errorptr, int bracount,
371    int options, BOOL isclass)    int options, BOOL isclass, compile_data *cd)
372  {  {
373  const uschar *ptr = *ptrptr;  const uschar *ptr = *ptrptr;
374  int c = *(++ptr) & 255;   /* Ensure > 0 on signed-char systems */  int c, i;
 int i;  
375    
376    c = *(++ptr) & 255;   /* Ensure > 0 on signed-char systems */
377  if (c == 0) *errorptr = ERR1;  if (c == 0) *errorptr = ERR1;
378    
379  /* Digits or letters may have special meaning; all others are literals. */  /* Digits or letters may have special meaning; all others are literals. */
# Line 424  else Line 411  else
411        {        {
412        oldptr = ptr;        oldptr = ptr;
413        c -= '0';        c -= '0';
414        while ((pcre_ctypes[ptr[1]] & ctype_digit) != 0)        while ((cd->ctypes[ptr[1]] & ctype_digit) != 0)
415          c = c * 10 + *(++ptr) - '0';          c = c * 10 + *(++ptr) - '0';
416        if (c < 10 || c <= bracount)        if (c < 10 || c <= bracount)
417          {          {
# Line 450  else Line 437  else
437    
438      case '0':      case '0':
439      c -= '0';      c -= '0';
440      while(i++ < 2 && (pcre_ctypes[ptr[1]] & ctype_digit) != 0 &&      while(i++ < 2 && (cd->ctypes[ptr[1]] & ctype_digit) != 0 &&
441        ptr[1] != '8' && ptr[1] != '9')        ptr[1] != '8' && ptr[1] != '9')
442          c = c * 8 + *(++ptr) - '0';          c = c * 8 + *(++ptr) - '0';
443      break;      break;
# Line 459  else Line 446  else
446    
447      case 'x':      case 'x':
448      c = 0;      c = 0;
449      while (i++ < 2 && (pcre_ctypes[ptr[1]] & ctype_xdigit) != 0)      while (i++ < 2 && (cd->ctypes[ptr[1]] & ctype_xdigit) != 0)
450        {        {
451        ptr++;        ptr++;
452        c = c * 16 + pcre_lcc[*ptr] -        c = c * 16 + cd->lcc[*ptr] -
453          (((pcre_ctypes[*ptr] & ctype_digit) != 0)? '0' : 'W');          (((cd->ctypes[*ptr] & ctype_digit) != 0)? '0' : 'W');
454        }        }
455      break;      break;
456    
# Line 477  else Line 464  else
464    
465      /* A letter is upper-cased; then the 0x40 bit is flipped */      /* A letter is upper-cased; then the 0x40 bit is flipped */
466    
467      if (c >= 'a' && c <= 'z') c = pcre_fcc[c];      if (c >= 'a' && c <= 'z') c = cd->fcc[c];
468      c ^= 0x40;      c ^= 0x40;
469      break;      break;
470    
471      /* PCRE_EXTRA enables extensions to Perl in the matter of escapes. Any      /* PCRE_EXTRA enables extensions to Perl in the matter of escapes. Any
472      other alphameric following \ is an error if PCRE_EXTRA was set; otherwise,      other alphameric following \ is an error if PCRE_EXTRA was set; otherwise,
473      for Perl compatibility, it is a literal. */      for Perl compatibility, it is a literal. This code looks a bit odd, but
474        there used to be some cases other than the default, and there may be again
475        in future, so I haven't "optimized" it. */
476    
477      default:      default:
478      if ((options & PCRE_EXTRA) != 0) switch(c)      if ((options & PCRE_EXTRA) != 0) switch(c)
479        {        {
       case 'X':  
       c = -ESC_X;      /* This could be a lookup if it ever got into Perl */  
       break;  
   
480        default:        default:
481        *errorptr = ERR3;        *errorptr = ERR3;
482        break;        break;
# Line 517  where the ddds are digits. Line 502  where the ddds are digits.
502    
503  Arguments:  Arguments:
504    p         pointer to the first char after '{'    p         pointer to the first char after '{'
505      cd        pointer to char tables block
506    
507  Returns:    TRUE or FALSE  Returns:    TRUE or FALSE
508  */  */
509    
510  static BOOL  static BOOL
511  is_counted_repeat(const uschar *p)  is_counted_repeat(const uschar *p, compile_data *cd)
512  {  {
513  if ((pcre_ctypes[*p++] & ctype_digit) == 0) return FALSE;  if ((cd->ctypes[*p++] & ctype_digit) == 0) return FALSE;
514  while ((pcre_ctypes[*p] & ctype_digit) != 0) p++;  while ((cd->ctypes[*p] & ctype_digit) != 0) p++;
515  if (*p == '}') return TRUE;  if (*p == '}') return TRUE;
516    
517  if (*p++ != ',') return FALSE;  if (*p++ != ',') return FALSE;
518  if (*p == '}') return TRUE;  if (*p == '}') return TRUE;
519    
520  if ((pcre_ctypes[*p++] & ctype_digit) == 0) return FALSE;  if ((cd->ctypes[*p++] & ctype_digit) == 0) return FALSE;
521  while ((pcre_ctypes[*p] & ctype_digit) != 0) p++;  while ((cd->ctypes[*p] & ctype_digit) != 0) p++;
522  return (*p == '}');  return (*p == '}');
523  }  }
524    
# Line 552  Arguments: Line 538  Arguments:
538    maxp       pointer to int for max    maxp       pointer to int for max
539               returned as -1 if no max               returned as -1 if no max
540    errorptr   points to pointer to error message    errorptr   points to pointer to error message
541      cd         pointer to character tables clock
542    
543  Returns:     pointer to '}' on success;  Returns:     pointer to '}' on success;
544               current ptr on error, with errorptr set               current ptr on error, with errorptr set
545  */  */
546    
547  static const uschar *  static const uschar *
548  read_repeat_counts(const uschar *p, int *minp, int *maxp, const char **errorptr)  read_repeat_counts(const uschar *p, int *minp, int *maxp,
549      const char **errorptr, compile_data *cd)
550  {  {
551  int min = 0;  int min = 0;
552  int max = -1;  int max = -1;
553    
554  while ((pcre_ctypes[*p] & ctype_digit) != 0) min = min * 10 + *p++ - '0';  while ((cd->ctypes[*p] & ctype_digit) != 0) min = min * 10 + *p++ - '0';
555    
556  if (*p == '}') max = min; else  if (*p == '}') max = min; else
557    {    {
558    if (*(++p) != '}')    if (*(++p) != '}')
559      {      {
560      max = 0;      max = 0;
561      while((pcre_ctypes[*p] & ctype_digit) != 0) max = max * 10 + *p++ - '0';      while((cd->ctypes[*p] & ctype_digit) != 0) max = max * 10 + *p++ - '0';
562      if (max < min)      if (max < min)
563        {        {
564        *errorptr = ERR4;        *errorptr = ERR4;
# Line 595  return p; Line 583  return p;
583    
584    
585  /*************************************************  /*************************************************
586    *        Find the fixed length of a pattern      *
587    *************************************************/
588    
589    /* Scan a pattern and compute the fixed length of subject that will match it,
590    if the length is fixed. This is needed for dealing with backward assertions.
591    
592    Arguments:
593      code     points to the start of the pattern (the bracket)
594    
595    Returns:   the fixed length, or -1 if there is no fixed length
596    */
597    
598    static int
599    find_fixedlength(uschar *code)
600    {
601    int length = -1;
602    
603    register int branchlength = 0;
604    register uschar *cc = code + 3;
605    
606    /* Scan along the opcodes for this branch. If we get to the end of the
607    branch, check the length against that of the other branches. */
608    
609    for (;;)
610      {
611      int d;
612      register int op = *cc;
613      if (op >= OP_BRA) op = OP_BRA;
614    
615      switch (op)
616        {
617        case OP_BRA:
618        case OP_ONCE:
619        case OP_COND:
620        d = find_fixedlength(cc);
621        if (d < 0) return -1;
622        branchlength += d;
623        do cc += (cc[1] << 8) + cc[2]; while (*cc == OP_ALT);
624        cc += 3;
625        break;
626    
627        /* Reached end of a branch; if it's a ket it is the end of a nested
628        call. If it's ALT it is an alternation in a nested call. If it is
629        END it's the end of the outer call. All can be handled by the same code. */
630    
631        case OP_ALT:
632        case OP_KET:
633        case OP_KETRMAX:
634        case OP_KETRMIN:
635        case OP_END:
636        if (length < 0) length = branchlength;
637          else if (length != branchlength) return -1;
638        if (*cc != OP_ALT) return length;
639        cc += 3;
640        branchlength = 0;
641        break;
642    
643        /* Skip over assertive subpatterns */
644    
645        case OP_ASSERT:
646        case OP_ASSERT_NOT:
647        case OP_ASSERTBACK:
648        case OP_ASSERTBACK_NOT:
649        do cc += (cc[1] << 8) + cc[2]; while (*cc == OP_ALT);
650        cc += 3;
651        break;
652    
653        /* Skip over things that don't match chars */
654    
655        case OP_REVERSE:
656        cc++;
657        /* Fall through */
658    
659        case OP_CREF:
660        case OP_OPT:
661        cc++;
662        /* Fall through */
663    
664        case OP_SOD:
665        case OP_EOD:
666        case OP_EODN:
667        case OP_CIRC:
668        case OP_DOLL:
669        case OP_NOT_WORD_BOUNDARY:
670        case OP_WORD_BOUNDARY:
671        cc++;
672        break;
673    
674        /* Handle char strings */
675    
676        case OP_CHARS:
677        branchlength += *(++cc);
678        cc += *cc + 1;
679        break;
680    
681        /* Handle exact repetitions */
682    
683        case OP_EXACT:
684        case OP_TYPEEXACT:
685        branchlength += (cc[1] << 8) + cc[2];
686        cc += 4;
687        break;
688    
689        /* Handle single-char matchers */
690    
691        case OP_NOT_DIGIT:
692        case OP_DIGIT:
693        case OP_NOT_WHITESPACE:
694        case OP_WHITESPACE:
695        case OP_NOT_WORDCHAR:
696        case OP_WORDCHAR:
697        case OP_ANY:
698        branchlength++;
699        cc++;
700        break;
701    
702    
703        /* Check a class for variable quantification */
704    
705        case OP_CLASS:
706        cc += (*cc == OP_REF)? 2 : 33;
707    
708        switch (*cc)
709          {
710          case OP_CRSTAR:
711          case OP_CRMINSTAR:
712          case OP_CRQUERY:
713          case OP_CRMINQUERY:
714          return -1;
715    
716          case OP_CRRANGE:
717          case OP_CRMINRANGE:
718          if ((cc[1] << 8) + cc[2] != (cc[3] << 8) + cc[4]) return -1;
719          branchlength += (cc[1] << 8) + cc[2];
720          cc += 5;
721          break;
722    
723          default:
724          branchlength++;
725          }
726        break;
727    
728        /* Anything else is variable length */
729    
730        default:
731        return -1;
732        }
733      }
734    /* Control never gets here */
735    }
736    
737    
738    
739    
740    /*************************************************
741    *           Check for POSIX class syntax         *
742    *************************************************/
743    
744    /* This function is called when the sequence "[:" or "[." or "[=" is
745    encountered in a character class. It checks whether this is followed by an
746    optional ^ and then a sequence of letters, terminated by a matching ":]" or
747    ".]" or "=]".
748    
749    Argument:
750      ptr      pointer to the initial [
751      endptr   where to return the end pointer
752      cd       pointer to compile data
753    
754    Returns:   TRUE or FALSE
755    */
756    
757    static BOOL
758    check_posix_syntax(const uschar *ptr, const uschar **endptr, compile_data *cd)
759    {
760    int terminator;          /* Don't combine these lines; the Solaris cc */
761    terminator = *(++ptr);   /* compiler warns about "non-constant" initializer. */
762    if (*(++ptr) == '^') ptr++;
763    while ((cd->ctypes[*ptr] & ctype_letter) != 0) ptr++;
764    if (*ptr == terminator && ptr[1] == ']')
765      {
766      *endptr = ptr;
767      return TRUE;
768      }
769    return FALSE;
770    }
771    
772    
773    
774    
775    /*************************************************
776    *          Check POSIX class name                *
777    *************************************************/
778    
779    /* This function is called to check the name given in a POSIX-style class entry
780    such as [:alnum:].
781    
782    Arguments:
783      ptr        points to the first letter
784      len        the length of the name
785    
786    Returns:     a value representing the name, or -1 if unknown
787    */
788    
789    static int
790    check_posix_name(const uschar *ptr, int len)
791    {
792    register int yield = 0;
793    while (posix_name_lengths[yield] != 0)
794      {
795      if (len == posix_name_lengths[yield] &&
796        strncmp((const char *)ptr, posix_names[yield], len) == 0) return yield;
797      yield++;
798      }
799    return -1;
800    }
801    
802    
803    
804    
805    /*************************************************
806  *           Compile one branch                   *  *           Compile one branch                   *
807  *************************************************/  *************************************************/
808    
809  /* Scan the pattern, compiling it into the code vector.  /* Scan the pattern, compiling it into the code vector.
810    
811  Arguments:  Arguments:
812    options    the option bits    options      the option bits
813    bracket    points to number of brackets used    brackets     points to number of brackets used
814    code       points to the pointer to the current code point    code         points to the pointer to the current code point
815    ptrptr     points to the current pattern pointer    ptrptr       points to the current pattern pointer
816    errorptr   points to pointer to error message    errorptr     points to pointer to error message
817      optchanged   set to the value of the last OP_OPT item compiled
818      reqchar      set to the last literal character required, else -1
819      countlits    set to count of mandatory literal characters
820      cd           contains pointers to tables
821    
822  Returns:     TRUE on success  Returns:       TRUE on success
823               FALSE, with *errorptr set on error                 FALSE, with *errorptr set on error
824  */  */
825    
826  static BOOL  static BOOL
827  compile_branch(int options, int *brackets, uschar **codeptr,  compile_branch(int options, int *brackets, uschar **codeptr,
828    const uschar **ptrptr, const char **errorptr)    const uschar **ptrptr, const char **errorptr, int *optchanged,
829      int *reqchar, int *countlits, compile_data *cd)
830  {  {
831  int repeat_type, op_type;  int repeat_type, op_type;
832  int repeat_min, repeat_max;  int repeat_min, repeat_max;
833  int bravalue, length;  int bravalue, length;
834    int greedy_default, greedy_non_default;
835    int prevreqchar;
836    int condcount = 0;
837    int subcountlits = 0;
838  register int c;  register int c;
839  register uschar *code = *codeptr;  register uschar *code = *codeptr;
840    uschar *tempcode;
841  const uschar *ptr = *ptrptr;  const uschar *ptr = *ptrptr;
842  const uschar *oldptr;  const uschar *tempptr;
843  uschar *previous = NULL;  uschar *previous = NULL;
844  uschar class[32];  uschar class[32];
845    
846    /* Set up the default and non-default settings for greediness */
847    
848    greedy_default = ((options & PCRE_UNGREEDY) != 0);
849    greedy_non_default = greedy_default ^ 1;
850    
851    /* Initialize no required char, and count of literals */
852    
853    *reqchar = prevreqchar = -1;
854    *countlits = 0;
855    
856  /* Switch on next character until the end of the branch */  /* Switch on next character until the end of the branch */
857    
858  for (;; ptr++)  for (;; ptr++)
859    {    {
860    BOOL negate_class;    BOOL negate_class;
861    int  class_charcount;    int class_charcount;
862    int  class_lastchar;    int class_lastchar;
863      int newoptions;
864      int condref;
865      int subreqchar;
866    
867    c = *ptr;    c = *ptr;
868    if ((options & PCRE_EXTENDED) != 0)    if ((options & PCRE_EXTENDED) != 0)
869      {      {
870      if ((pcre_ctypes[c] & ctype_space) != 0) continue;      if ((cd->ctypes[c] & ctype_space) != 0) continue;
871      if (c == '#')      if (c == '#')
872        {        {
873        while ((c = *(++ptr)) != 0 && c != '\n');        /* The space before the ; is to avoid a warning on a silly compiler
874          on the Macintosh. */
875          while ((c = *(++ptr)) != 0 && c != '\n') ;
876        continue;        continue;
877        }        }
878      }      }
# Line 679  for (;; ptr++) Line 912  for (;; ptr++)
912    
913      case '[':      case '[':
914      previous = code;      previous = code;
915        *code++ = OP_CLASS;
916    
917      /* If the first character is '^', set the negation flag, and use a      /* If the first character is '^', set the negation flag and skip it. */
     different opcode. This only matters if caseless matching is specified at  
     runtime. */  
918    
919      if ((c = *(++ptr)) == '^')      if ((c = *(++ptr)) == '^')
920        {        {
921        negate_class = TRUE;        negate_class = TRUE;
       *code++ = OP_NEGCLASS;  
922        c = *(++ptr);        c = *(++ptr);
923        }        }
924      else      else negate_class = FALSE;
       {  
       negate_class = FALSE;  
       *code++ = OP_CLASS;  
       }  
925    
926      /* Keep a count of chars so that we can optimize the case of just a single      /* Keep a count of chars so that we can optimize the case of just a single
927      character. */      character. */
# Line 720  for (;; ptr++) Line 947  for (;; ptr++)
947          goto FAILED;          goto FAILED;
948          }          }
949    
950          /* Handle POSIX class names. Perl allows a negation extension of the
951          form [:^name]. A square bracket that doesn't match the syntax is
952          treated as a literal. We also recognize the POSIX constructions
953          [.ch.] and [=ch=] ("collating elements") and fault them, as Perl
954          5.6 does. */
955    
956          if (c == '[' &&
957              (ptr[1] == ':' || ptr[1] == '.' || ptr[1] == '=') &&
958              check_posix_syntax(ptr, &tempptr, cd))
959            {
960            BOOL local_negate = FALSE;
961            int posix_class, i;
962            register const uschar *cbits = cd->cbits;
963    
964            if (ptr[1] != ':')
965              {
966              *errorptr = ERR31;
967              goto FAILED;
968              }
969    
970            ptr += 2;
971            if (*ptr == '^')
972              {
973              local_negate = TRUE;
974              ptr++;
975              }
976    
977            posix_class = check_posix_name(ptr, tempptr - ptr);
978            if (posix_class < 0)
979              {
980              *errorptr = ERR30;
981              goto FAILED;
982              }
983    
984            /* If matching is caseless, upper and lower are converted to
985            alpha. This relies on the fact that the class table starts with
986            alpha, lower, upper as the first 3 entries. */
987    
988            if ((options & PCRE_CASELESS) != 0 && posix_class <= 2)
989              posix_class = 0;
990    
991            /* Or into the map we are building up to 3 of the static class
992            tables, or their negations. */
993    
994            posix_class *= 3;
995            for (i = 0; i < 3; i++)
996              {
997              int taboffset = posix_class_maps[posix_class + i];
998              if (taboffset < 0) break;
999              if (local_negate)
1000                for (c = 0; c < 32; c++) class[c] |= ~cbits[c+taboffset];
1001              else
1002                for (c = 0; c < 32; c++) class[c] |= cbits[c+taboffset];
1003              }
1004    
1005            ptr = tempptr + 1;
1006            class_charcount = 10;  /* Set > 1; assumes more than 1 per class */
1007            continue;
1008            }
1009    
1010        /* Backslash may introduce a single character, or it may introduce one        /* Backslash may introduce a single character, or it may introduce one
1011        of the specials, which just set a flag. Escaped items are checked for        of the specials, which just set a flag. Escaped items are checked for
1012        validity in the pre-compiling pass. The sequence \b is a special case.        validity in the pre-compiling pass. The sequence \b is a special case.
# Line 730  for (;; ptr++) Line 1017  for (;; ptr++)
1017    
1018        if (c == '\\')        if (c == '\\')
1019          {          {
1020          c = check_escape(&ptr, errorptr, *brackets, options, TRUE);          c = check_escape(&ptr, errorptr, *brackets, options, TRUE, cd);
1021          if (-c == ESC_b) c = '\b';          if (-c == ESC_b) c = '\b';
1022          else if (c < 0)          else if (c < 0)
1023            {            {
1024              register const uschar *cbits = cd->cbits;
1025            class_charcount = 10;            class_charcount = 10;
1026            switch (-c)            switch (-c)
1027              {              {
1028              case ESC_d:              case ESC_d:
1029              for (c = 0; c < 32; c++) class[c] |= pcre_cbits[c+cbit_digit];              for (c = 0; c < 32; c++) class[c] |= cbits[c+cbit_digit];
1030              continue;              continue;
1031    
1032              case ESC_D:              case ESC_D:
1033              for (c = 0; c < 32; c++) class[c] |= ~pcre_cbits[c+cbit_digit];              for (c = 0; c < 32; c++) class[c] |= ~cbits[c+cbit_digit];
1034              continue;              continue;
1035    
1036              case ESC_w:              case ESC_w:
1037              for (c = 0; c < 32; c++)              for (c = 0; c < 32; c++) class[c] |= cbits[c+cbit_word];
               class[c] |= (pcre_cbits[c] | pcre_cbits[c+cbit_word]);  
1038              continue;              continue;
1039    
1040              case ESC_W:              case ESC_W:
1041              for (c = 0; c < 32; c++)              for (c = 0; c < 32; c++) class[c] |= ~cbits[c+cbit_word];
               class[c] |= ~(pcre_cbits[c] | pcre_cbits[c+cbit_word]);  
1042              continue;              continue;
1043    
1044              case ESC_s:              case ESC_s:
1045              for (c = 0; c < 32; c++) class[c] |= pcre_cbits[c+cbit_space];              for (c = 0; c < 32; c++) class[c] |= cbits[c+cbit_space];
1046              continue;              continue;
1047    
1048              case ESC_S:              case ESC_S:
1049              for (c = 0; c < 32; c++) class[c] |= ~pcre_cbits[c+cbit_space];              for (c = 0; c < 32; c++) class[c] |= ~cbits[c+cbit_space];
1050              continue;              continue;
1051    
1052              default:              default:
# Line 792  for (;; ptr++) Line 1078  for (;; ptr++)
1078    
1079          if (d == '\\')          if (d == '\\')
1080            {            {
1081            d = check_escape(&ptr, errorptr, *brackets, options, TRUE);            d = check_escape(&ptr, errorptr, *brackets, options, TRUE, cd);
1082            if (d < 0)            if (d < 0)
1083              {              {
1084              if (d == -ESC_b) d = '\b'; else              if (d == -ESC_b) d = '\b'; else
# Line 814  for (;; ptr++) Line 1100  for (;; ptr++)
1100            class[c/8] |= (1 << (c&7));            class[c/8] |= (1 << (c&7));
1101            if ((options & PCRE_CASELESS) != 0)            if ((options & PCRE_CASELESS) != 0)
1102              {              {
1103              int uc = pcre_fcc[c];           /* flip case */              int uc = cd->fcc[c];           /* flip case */
1104              class[uc/8] |= (1 << (uc&7));              class[uc/8] |= (1 << (uc&7));
1105              }              }
1106            class_charcount++;                /* in case a one-char range */            class_charcount++;                /* in case a one-char range */
# Line 829  for (;; ptr++) Line 1115  for (;; ptr++)
1115        class [c/8] |= (1 << (c&7));        class [c/8] |= (1 << (c&7));
1116        if ((options & PCRE_CASELESS) != 0)        if ((options & PCRE_CASELESS) != 0)
1117          {          {
1118          c = pcre_fcc[c];   /* flip case */          c = cd->fcc[c];   /* flip case */
1119          class[c/8] |= (1 << (c&7));          class[c/8] |= (1 << (c&7));
1120          }          }
1121        class_charcount++;        class_charcount++;
# Line 876  for (;; ptr++) Line 1162  for (;; ptr++)
1162      /* Various kinds of repeat */      /* Various kinds of repeat */
1163    
1164      case '{':      case '{':
1165      if (!is_counted_repeat(ptr+1)) goto NORMAL_CHAR;      if (!is_counted_repeat(ptr+1, cd)) goto NORMAL_CHAR;
1166      ptr = read_repeat_counts(ptr+1, &repeat_min, &repeat_max, errorptr);      ptr = read_repeat_counts(ptr+1, &repeat_min, &repeat_max, errorptr, cd);
1167      if (*errorptr != NULL) goto FAILED;      if (*errorptr != NULL) goto FAILED;
1168      goto REPEAT;      goto REPEAT;
1169    
# Line 902  for (;; ptr++) Line 1188  for (;; ptr++)
1188        goto FAILED;        goto FAILED;
1189        }        }
1190    
1191      /* If the next character is '?' this is a minimizing repeat. Advance to the      /* If the next character is '?' this is a minimizing repeat, by default,
1192        but if PCRE_UNGREEDY is set, it works the other way round. Advance to the
1193      next character. */      next character. */
1194    
1195      if (ptr[1] == '?') { repeat_type = 1; ptr++; } else repeat_type = 0;      if (ptr[1] == '?')
1196          { repeat_type = greedy_non_default; ptr++; }
1197      /* If the maximum is zero then the minimum must also be zero; Perl allows      else repeat_type = greedy_default;
     this case, so we do too - by simply omitting the item altogether. */  
   
     if (repeat_max == 0) code = previous;  
1198    
1199      /* If previous was a string of characters, chop off the last one and use it      /* If previous was a string of characters, chop off the last one and use it
1200      as the subject of the repeat. If there was only one character, we can      as the subject of the repeat. If there was only one character, we can
1201      abolish the previous item altogether. */      abolish the previous item altogether. A repeat with a zero minimum wipes
1202        out any reqchar setting, backing up to the previous value. We must also
1203        adjust the countlits value. */
1204    
1205      else if (*previous == OP_CHARS)      if (*previous == OP_CHARS)
1206        {        {
1207        int len = previous[1];        int len = previous[1];
1208    
1209          if (repeat_min == 0) *reqchar = prevreqchar;
1210          *countlits += repeat_min - 1;
1211    
1212        if (len == 1)        if (len == 1)
1213          {          {
1214          c = previous[2];          c = previous[2];
# Line 950  for (;; ptr++) Line 1240  for (;; ptr++)
1240      create a suitable repeat item. The code is shared with single-character      create a suitable repeat item. The code is shared with single-character
1241      repeats by adding a suitable offset into repeat_type. */      repeats by adding a suitable offset into repeat_type. */
1242    
1243      else if ((int)*previous < OP_EOD || *previous == OP_ANY)      else if ((int)*previous < OP_EODN || *previous == OP_ANY)
1244        {        {
1245        op_type = OP_TYPESTAR - OP_STAR;  /* Use type opcodes */        op_type = OP_TYPESTAR - OP_STAR;  /* Use type opcodes */
1246        c = *previous;        c = *previous;
1247        code = previous;        code = previous;
1248    
1249        OUTPUT_SINGLE_REPEAT:        OUTPUT_SINGLE_REPEAT:
1250        repeat_type += op_type;      /* Combine both values for many cases */  
1251          /* If the maximum is zero then the minimum must also be zero; Perl allows
1252          this case, so we do too - by simply omitting the item altogether. */
1253    
1254          if (repeat_max == 0) goto END_REPEAT;
1255    
1256          /* Combine the op_type with the repeat_type */
1257    
1258          repeat_type += op_type;
1259    
1260        /* A minimum of zero is handled either as the special case * or ?, or as        /* A minimum of zero is handled either as the special case * or ?, or as
1261        an UPTO, with the maximum given. */        an UPTO, with the maximum given. */
# Line 994  for (;; ptr++) Line 1292  for (;; ptr++)
1292          /* If the mininum is 1 and the previous item was a character string,          /* If the mininum is 1 and the previous item was a character string,
1293          we either have to put back the item that got cancelled if the string          we either have to put back the item that got cancelled if the string
1294          length was 1, or add the character back onto the end of a longer          length was 1, or add the character back onto the end of a longer
1295          string. For a character type nothing need be done; it will just get put          string. For a character type nothing need be done; it will just get
1296          back naturally. */          put back naturally. Note that the final character is always going to
1297            get added below. */
1298    
1299          else if (*previous == OP_CHARS)          else if (*previous == OP_CHARS)
1300            {            {
1301            if (code == previous) code += 2; else previous[1]++;            if (code == previous) code += 2; else previous[1]++;
1302            }            }
1303    
1304            /*  For a single negated character we also have to put back the
1305            item that got cancelled. */
1306    
1307            else if (*previous == OP_NOT) code++;
1308    
1309          /* If the maximum is unlimited, insert an OP_STAR. */          /* If the maximum is unlimited, insert an OP_STAR. */
1310    
1311          if (repeat_max < 0)          if (repeat_max < 0)
# Line 1028  for (;; ptr++) Line 1332  for (;; ptr++)
1332        }        }
1333    
1334      /* If previous was a character class or a back reference, we put the repeat      /* If previous was a character class or a back reference, we put the repeat
1335      stuff after it. */      stuff after it, but just skip the item if the repeat was {0,0}. */
1336    
1337      else if (*previous == OP_CLASS || *previous == OP_NEGCLASS ||      else if (*previous == OP_CLASS || *previous == OP_REF)
              *previous == OP_REF)  
1338        {        {
1339          if (repeat_max == 0)
1340            {
1341            code = previous;
1342            goto END_REPEAT;
1343            }
1344        if (repeat_min == 0 && repeat_max == -1)        if (repeat_min == 0 && repeat_max == -1)
1345          *code++ = OP_CRSTAR + repeat_type;          *code++ = OP_CRSTAR + repeat_type;
1346        else if (repeat_min == 1 && repeat_max == -1)        else if (repeat_min == 1 && repeat_max == -1)
# Line 1051  for (;; ptr++) Line 1359  for (;; ptr++)
1359        }        }
1360    
1361      /* If previous was a bracket group, we may have to replicate it in certain      /* If previous was a bracket group, we may have to replicate it in certain
1362      cases. If the maximum repeat count is unlimited, check that the bracket      cases. */
     group cannot match the empty string, and diagnose an error if it can. */  
1363    
1364      else if ((int)*previous >= OP_BRA)      else if ((int)*previous >= OP_BRA || (int)*previous == OP_ONCE ||
1365                 (int)*previous == OP_COND)
1366        {        {
1367        int i;        register int i;
1368          int ketoffset = 0;
1369        int len = code - previous;        int len = code - previous;
1370          uschar *bralink = NULL;
1371    
1372        if (repeat_max == -1 && could_be_empty(previous))        /* If the maximum repeat count is unlimited, find the end of the bracket
1373          by scanning through from the start, and compute the offset back to it
1374          from the current code pointer. There may be an OP_OPT setting following
1375          the final KET, so we can't find the end just by going back from the code
1376          pointer. */
1377    
1378          if (repeat_max == -1)
1379            {
1380            register uschar *ket = previous;
1381            do ket += (ket[1] << 8) + ket[2]; while (*ket != OP_KET);
1382            ketoffset = code - ket;
1383            }
1384    
1385          /* The case of a zero minimum is special because of the need to stick
1386          OP_BRAZERO in front of it, and because the group appears once in the
1387          data, whereas in other cases it appears the minimum number of times. For
1388          this reason, it is simplest to treat this case separately, as otherwise
1389          the code gets far too mess. There are several special subcases when the
1390          minimum is zero. */
1391    
1392          if (repeat_min == 0)
1393          {          {
1394          *errorptr = ERR10;          /* If we set up a required char from the bracket, we must back off
1395          goto FAILED;          to the previous value and reset the countlits value too. */
         }  
1396    
1397        /* If the minimum is greater than zero, and the maximum is unlimited or          if (subcountlits > 0)
1398        equal to the minimum, the first copy remains where it is, and is            {
1399        replicated up to the minimum number of times. This case includes the +            *reqchar = prevreqchar;
1400        repeat, but of course no replication is needed in that case. */            *countlits -= subcountlits;
1401              }
1402    
1403        if (repeat_min > 0 && (repeat_max == -1 || repeat_max == repeat_min))          /* If the maximum is also zero, we just omit the group from the output
1404          {          altogether. */
1405          for (i = 1; i < repeat_min; i++)  
1406            if (repeat_max == 0)
1407            {            {
1408            memcpy(code, previous, len);            code = previous;
1409            code += len;            goto END_REPEAT;
1410            }            }
         }  
1411    
1412        /* If the minimum is zero, stick BRAZERO in front of the first copy.          /* If the maximum is 1 or unlimited, we just have to stick in the
1413        Then, if there is a fixed upper limit, replicated up to that many times,          BRAZERO and do no more at this point. */
       sticking BRAZERO in front of all the optional ones. */  
1414    
1415        else          if (repeat_max <= 1)
         {  
         if (repeat_min == 0)  
1416            {            {
1417            memmove(previous+1, previous, len);            memmove(previous+1, previous, len);
1418            code++;            code++;
1419            *previous++ = OP_BRAZERO + repeat_type;            *previous++ = OP_BRAZERO + repeat_type;
1420            }            }
1421    
1422            /* If the maximum is greater than 1 and limited, we have to replicate
1423            in a nested fashion, sticking OP_BRAZERO before each set of brackets.
1424            The first one has to be handled carefully because it's the original
1425            copy, which has to be moved up. The remainder can be handled by code
1426            that is common with the non-zero minimum case below. We just have to
1427            adjust the value or repeat_max, since one less copy is required. */
1428    
1429            else
1430              {
1431              int offset;
1432              memmove(previous+4, previous, len);
1433              code += 4;
1434              *previous++ = OP_BRAZERO + repeat_type;
1435              *previous++ = OP_BRA;
1436    
1437              /* We chain together the bracket offset fields that have to be
1438              filled in later when the ends of the brackets are reached. */
1439    
1440              offset = (bralink == NULL)? 0 : previous - bralink;
1441              bralink = previous;
1442              *previous++ = offset >> 8;
1443              *previous++ = offset & 255;
1444              }
1445    
1446            repeat_max--;
1447            }
1448    
1449          /* If the minimum is greater than zero, replicate the group as many
1450          times as necessary, and adjust the maximum to the number of subsequent
1451          copies that we need. */
1452    
1453          else
1454            {
1455          for (i = 1; i < repeat_min; i++)          for (i = 1; i < repeat_min; i++)
1456            {            {
1457            memcpy(code, previous, len);            memcpy(code, previous, len);
1458            code += len;            code += len;
1459            }            }
1460            if (repeat_max > 0) repeat_max -= repeat_min;
1461            }
1462    
1463          /* This code is common to both the zero and non-zero minimum cases. If
1464          the maximum is limited, it replicates the group in a nested fashion,
1465          remembering the bracket starts on a stack. In the case of a zero minimum,
1466          the first one was set up above. In all cases the repeat_max now specifies
1467          the number of additional copies needed. */
1468    
1469          for (i = (repeat_min > 0)? repeat_min : 1; i < repeat_max; i++)        if (repeat_max >= 0)
1470            {
1471            for (i = repeat_max - 1; i >= 0; i--)
1472            {            {
1473            *code++ = OP_BRAZERO + repeat_type;            *code++ = OP_BRAZERO + repeat_type;
1474    
1475              /* All but the final copy start a new nesting, maintaining the
1476              chain of brackets outstanding. */
1477    
1478              if (i != 0)
1479                {
1480                int offset;
1481                *code++ = OP_BRA;
1482                offset = (bralink == NULL)? 0 : code - bralink;
1483                bralink = code;
1484                *code++ = offset >> 8;
1485                *code++ = offset & 255;
1486                }
1487    
1488            memcpy(code, previous, len);            memcpy(code, previous, len);
1489            code += len;            code += len;
1490            }            }
1491    
1492            /* Now chain through the pending brackets, and fill in their length
1493            fields (which are holding the chain links pro tem). */
1494    
1495            while (bralink != NULL)
1496              {
1497              int oldlinkoffset;
1498              int offset = code - bralink + 1;
1499              uschar *bra = code - offset;
1500              oldlinkoffset = (bra[1] << 8) + bra[2];
1501              bralink = (oldlinkoffset == 0)? NULL : bralink - oldlinkoffset;
1502              *code++ = OP_KET;
1503              *code++ = bra[1] = offset >> 8;
1504              *code++ = bra[2] = (offset & 255);
1505              }
1506          }          }
1507    
1508        /* If the maximum is unlimited, set a repeater in the final copy. */        /* If the maximum is unlimited, set a repeater in the final copy. We
1509          can't just offset backwards from the current code point, because we
1510          don't know if there's been an options resetting after the ket. The
1511          correct offset was computed above. */
1512    
1513        if (repeat_max == -1) code[-3] = OP_KETRMAX + repeat_type;        else code[-ketoffset] = OP_KETRMAX + repeat_type;
1514        }        }
1515    
1516      /* Else there's some kind of shambles */      /* Else there's some kind of shambles */
# Line 1121  for (;; ptr++) Line 1523  for (;; ptr++)
1523    
1524      /* In all case we no longer have a previous item. */      /* In all case we no longer have a previous item. */
1525    
1526        END_REPEAT:
1527      previous = NULL;      previous = NULL;
1528      break;      break;
1529    
1530    
1531      /* Start of nested bracket sub-expression, or comment or lookahead.      /* Start of nested bracket sub-expression, or comment or lookahead or
1532      First deal with special things that can come after a bracket; all are      lookbehind or option setting or condition. First deal with special things
1533      introduced by ?, and the appearance of any of them means that this is not a      that can come after a bracket; all are introduced by ?, and the appearance
1534      referencing group. They were checked for validity in the first pass over      of any of them means that this is not a referencing group. They were
1535      the string, so we don't have to check for syntax errors here.  */      checked for validity in the first pass over the string, so we don't have to
1536        check for syntax errors here.  */
1537    
1538      case '(':      case '(':
1539      previous = code;              /* Only real brackets can be repeated */      newoptions = options;
1540        condref = -1;
1541    
1542      if (*(++ptr) == '?')      if (*(++ptr) == '?')
1543        {        {
1544        bravalue = OP_BRA;        int set, unset;
1545          int *optset;
1546    
1547        switch (*(++ptr))        switch (*(++ptr))
1548          {          {
1549          case '#':          case '#':                 /* Comment; skip to ket */
         case 'i':  
         case 'm':  
         case 's':  
         case 'x':  
1550          ptr++;          ptr++;
1551          while (*ptr != ')') ptr++;          while (*ptr != ')') ptr++;
         previous = NULL;  
1552          continue;          continue;
1553    
1554          case ':':                 /* Non-extracting bracket */          case ':':                 /* Non-extracting bracket */
1555            bravalue = OP_BRA;
1556          ptr++;          ptr++;
1557          break;          break;
1558    
1559          case '=':                 /* Assertions can't be repeated */          case '(':
1560            bravalue = OP_COND;       /* Conditional group */
1561            if ((cd->ctypes[*(++ptr)] & ctype_digit) != 0)
1562              {
1563              condref = *ptr - '0';
1564              while (*(++ptr) != ')') condref = condref*10 + *ptr - '0';
1565              ptr++;
1566              }
1567            else ptr--;
1568            break;
1569    
1570            case '=':                 /* Positive lookahead */
1571          bravalue = OP_ASSERT;          bravalue = OP_ASSERT;
1572          ptr++;          ptr++;
         previous = NULL;  
1573          break;          break;
1574    
1575          case '!':          case '!':                 /* Negative lookahead */
1576          bravalue = OP_ASSERT_NOT;          bravalue = OP_ASSERT_NOT;
1577          ptr++;          ptr++;
         previous = NULL;  
1578          break;          break;
1579    
1580          case '>':                         /* "Match once" brackets */          case '<':                 /* Lookbehinds */
1581          if ((options & PCRE_EXTRA) != 0)  /* Not yet standard */          switch (*(++ptr))
1582            {            {
1583            bravalue = OP_ONCE;            case '=':               /* Positive lookbehind */
1584              bravalue = OP_ASSERTBACK;
1585              ptr++;
1586              break;
1587    
1588              case '!':               /* Negative lookbehind */
1589              bravalue = OP_ASSERTBACK_NOT;
1590            ptr++;            ptr++;
           previous = NULL;  
1591            break;            break;
1592    
1593              default:                /* Syntax error */
1594              *errorptr = ERR24;
1595              goto FAILED;
1596            }            }
1597          /* Else fall through */          break;
1598    
1599          default:          case '>':                 /* One-time brackets */
1600          *errorptr = ERR12;          bravalue = OP_ONCE;
1601          goto FAILED;          ptr++;
1602            break;
1603    
1604            case 'R':                 /* Pattern recursion */
1605            *code++ = OP_RECURSE;
1606            ptr++;
1607            continue;
1608    
1609            default:                  /* Option setting */
1610            set = unset = 0;
1611            optset = &set;
1612    
1613            while (*ptr != ')' && *ptr != ':')
1614              {
1615              switch (*ptr++)
1616                {
1617                case '-': optset = &unset; break;
1618    
1619                case 'i': *optset |= PCRE_CASELESS; break;
1620                case 'm': *optset |= PCRE_MULTILINE; break;
1621                case 's': *optset |= PCRE_DOTALL; break;
1622                case 'x': *optset |= PCRE_EXTENDED; break;
1623                case 'U': *optset |= PCRE_UNGREEDY; break;
1624                case 'X': *optset |= PCRE_EXTRA; break;
1625    
1626                default:
1627                *errorptr = ERR12;
1628                goto FAILED;
1629                }
1630              }
1631    
1632            /* Set up the changed option bits, but don't change anything yet. */
1633    
1634            newoptions = (options | set) & (~unset);
1635    
1636            /* If the options ended with ')' this is not the start of a nested
1637            group with option changes, so the options change at this level. At top
1638            level there is nothing else to be done (the options will in fact have
1639            been set from the start of compiling as a result of the first pass) but
1640            at an inner level we must compile code to change the ims options if
1641            necessary, and pass the new setting back so that it can be put at the
1642            start of any following branches, and when this group ends, a resetting
1643            item can be compiled. */
1644    
1645            if (*ptr == ')')
1646              {
1647              if ((options & PCRE_INGROUP) != 0 &&
1648                  (options & PCRE_IMS) != (newoptions & PCRE_IMS))
1649                {
1650                *code++ = OP_OPT;
1651                *code++ = *optchanged = newoptions & PCRE_IMS;
1652                }
1653              options = newoptions;  /* Change options at this level */
1654              previous = NULL;       /* This item can't be repeated */
1655              continue;              /* It is complete */
1656              }
1657    
1658            /* If the options ended with ':' we are heading into a nested group
1659            with possible change of options. Such groups are non-capturing and are
1660            not assertions of any kind. All we need to do is skip over the ':';
1661            the newoptions value is handled below. */
1662    
1663            bravalue = OP_BRA;
1664            ptr++;
1665          }          }
1666        }        }
1667    
1668      /* Else we have a referencing group */      /* Else we have a referencing group; adjust the opcode. */
1669    
1670      else      else
1671        {        {
# Line 1193  for (;; ptr++) Line 1677  for (;; ptr++)
1677        bravalue = OP_BRA + *brackets;        bravalue = OP_BRA + *brackets;
1678        }        }
1679    
1680      /* Process nested bracketed re; at end pointer is on the bracket. We copy      /* Process nested bracketed re. Assertions may not be repeated, but other
1681      code into a non-register variable in order to be able to pass its address      kinds can be. We copy code into a non-register variable in order to be able
1682      because some compilers complain otherwise. */      to pass its address because some compilers complain otherwise. Pass in a
1683        new setting for the ims options if they have changed. */
1684    
1685        previous = (bravalue >= OP_ONCE)? code : NULL;
1686      *code = bravalue;      *code = bravalue;
1687        tempcode = code;
1688    
1689        if (!compile_regex(
1690             options | PCRE_INGROUP,       /* Set for all nested groups */
1691             ((options & PCRE_IMS) != (newoptions & PCRE_IMS))?
1692               newoptions & PCRE_IMS : -1, /* Pass ims options if changed */
1693             brackets,                     /* Bracket level */
1694             &tempcode,                    /* Where to put code (updated) */
1695             &ptr,                         /* Input pointer (updated) */
1696             errorptr,                     /* Where to put an error message */
1697             (bravalue == OP_ASSERTBACK ||
1698              bravalue == OP_ASSERTBACK_NOT), /* TRUE if back assert */
1699             condref,                      /* Condition reference number */
1700             &subreqchar,                  /* For possible last char */
1701             &subcountlits,                /* For literal count */
1702             cd))                          /* Tables block */
1703          goto FAILED;
1704    
1705        /* At the end of compiling, code is still pointing to the start of the
1706        group, while tempcode has been updated to point past the end of the group
1707        and any option resetting that may follow it. The pattern pointer (ptr)
1708        is on the bracket. */
1709    
1710        /* If this is a conditional bracket, check that there are no more than
1711        two branches in the group. */
1712    
1713        if (bravalue == OP_COND)
1714        {        {
1715        uschar *mcode = code;        uschar *tc = code;
1716        if (!compile_regex(options, brackets, &mcode, &ptr, errorptr))        condcount = 0;
1717    
1718          do {
1719             condcount++;
1720             tc += (tc[1] << 8) | tc[2];
1721             }
1722          while (*tc != OP_KET);
1723    
1724          if (condcount > 2)
1725            {
1726            *errorptr = ERR27;
1727          goto FAILED;          goto FAILED;
1728        code = mcode;          }
1729          }
1730    
1731        /* Handle updating of the required character. If the subpattern didn't
1732        set one, leave it as it was. Otherwise, update it for normal brackets of
1733        all kinds, forward assertions, and conditions with two branches. Don't
1734        update the literal count for forward assertions, however. If the bracket
1735        is followed by a quantifier with zero repeat, we have to back off. Hence
1736        the definition of prevreqchar and subcountlits outside the main loop so
1737        that they can be accessed for the back off. */
1738    
1739        if (subreqchar > 0 &&
1740             (bravalue >= OP_BRA || bravalue == OP_ONCE || bravalue == OP_ASSERT ||
1741             (bravalue == OP_COND && condcount == 2)))
1742          {
1743          prevreqchar = *reqchar;
1744          *reqchar = subreqchar;
1745          if (bravalue != OP_ASSERT) *countlits += subcountlits;
1746        }        }
1747    
1748        /* Now update the main code pointer to the end of the group. */
1749    
1750        code = tempcode;
1751    
1752        /* Error if hit end of pattern */
1753    
1754      if (*ptr != ')')      if (*ptr != ')')
1755        {        {
1756        *errorptr = ERR14;        *errorptr = ERR14;
# Line 1217  for (;; ptr++) Line 1763  for (;; ptr++)
1763      for validity in the pre-compiling pass. */      for validity in the pre-compiling pass. */
1764    
1765      case '\\':      case '\\':
1766      oldptr = ptr;      tempptr = ptr;
1767      c = check_escape(&ptr, errorptr, *brackets, options, FALSE);      c = check_escape(&ptr, errorptr, *brackets, options, FALSE, cd);
1768    
1769      /* Handle metacharacters introduced by \. For ones like \d, the ESC_ values      /* Handle metacharacters introduced by \. For ones like \d, the ESC_ values
1770      are arranged to be the negation of the corresponding OP_values. For the      are arranged to be the negation of the corresponding OP_values. For the
# Line 1231  for (;; ptr++) Line 1777  for (;; ptr++)
1777        {        {
1778        if (-c >= ESC_REF)        if (-c >= ESC_REF)
1779          {          {
         int refnum = -c - ESC_REF;  
         if (*brackets < refnum)  
           {  
           *errorptr = ERR15;  
           goto FAILED;  
           }  
1780          previous = code;          previous = code;
1781          *code++ = OP_REF;          *code++ = OP_REF;
1782          *code++ = refnum;          *code++ = -c - ESC_REF;
1783          }          }
1784        else        else
1785          {          {
1786          previous = (-c > ESC_b && -c < ESC_X)? code : NULL;          previous = (-c > ESC_b && -c < ESC_Z)? code : NULL;
1787          *code++ = -c;          *code++ = -c;
1788          }          }
1789        continue;        continue;
# Line 1251  for (;; ptr++) Line 1791  for (;; ptr++)
1791    
1792      /* Data character: reset and fall through */      /* Data character: reset and fall through */
1793    
1794      ptr = oldptr;      ptr = tempptr;
1795      c = '\\';      c = '\\';
1796    
1797      /* Handle a run of data characters until a metacharacter is encountered.      /* Handle a run of data characters until a metacharacter is encountered.
# Line 1269  for (;; ptr++) Line 1809  for (;; ptr++)
1809        {        {
1810        if ((options & PCRE_EXTENDED) != 0)        if ((options & PCRE_EXTENDED) != 0)
1811          {          {
1812          if ((pcre_ctypes[c] & ctype_space) != 0) continue;          if ((cd->ctypes[c] & ctype_space) != 0) continue;
1813          if (c == '#')          if (c == '#')
1814            {            {
1815            while ((c = *(++ptr)) != 0 && c != '\n');            /* The space before the ; is to avoid a warning on a silly compiler
1816              on the Macintosh. */
1817              while ((c = *(++ptr)) != 0 && c != '\n') ;
1818            if (c == 0) break;            if (c == 0) break;
1819            continue;            continue;
1820            }            }
# Line 1284  for (;; ptr++) Line 1826  for (;; ptr++)
1826    
1827        if (c == '\\')        if (c == '\\')
1828          {          {
1829          oldptr = ptr;          tempptr = ptr;
1830          c = check_escape(&ptr, errorptr, *brackets, options, FALSE);          c = check_escape(&ptr, errorptr, *brackets, options, FALSE, cd);
1831          if (c < 0) { ptr = oldptr; break; }          if (c < 0) { ptr = tempptr; break; }
1832          }          }
1833    
1834        /* Ordinary character or single-char escape */        /* Ordinary character or single-char escape */
# Line 1297  for (;; ptr++) Line 1839  for (;; ptr++)
1839    
1840      /* This "while" is the end of the "do" above. */      /* This "while" is the end of the "do" above. */
1841    
1842      while (length < 255 && (pcre_ctypes[c = *(++ptr)] & ctype_meta) == 0);      while (length < 255 && (cd->ctypes[c = *(++ptr)] & ctype_meta) == 0);
1843    
1844        /* Update the last character and the count of literals */
1845    
1846        prevreqchar = (length > 1)? code[-2] : *reqchar;
1847        *reqchar = code[-1];
1848        *countlits += length;
1849    
1850      /* Compute the length and set it in the data vector, and advance to      /* Compute the length and set it in the data vector, and advance to
1851      the next state. */      the next state. */
# Line 1327  return FALSE; Line 1875  return FALSE;
1875  /* On entry, ptr is pointing past the bracket character, but on return  /* On entry, ptr is pointing past the bracket character, but on return
1876  it points to the closing bracket, or vertical bar, or end of string.  it points to the closing bracket, or vertical bar, or end of string.
1877  The code variable is pointing at the byte into which the BRA operator has been  The code variable is pointing at the byte into which the BRA operator has been
1878  stored.  stored. If the ims options are changed at the start (for a (?ims: group) or
1879    during any branch, we need to insert an OP_OPT item at the start of every
1880    following branch to ensure they get set correctly at run time, and also pass
1881    the new options into every subsequent branch compile.
1882    
1883  Argument:  Argument:
1884    options   the option bits    options     the option bits
1885    brackets  -> int containing the number of extracting brackets used    optchanged  new ims options to set as if (?ims) were at the start, or -1
1886    codeptr   -> the address of the current code pointer                 for no change
1887    ptrptr    -> the address of the current pattern pointer    brackets    -> int containing the number of extracting brackets used
1888    errorptr  -> pointer to error message    codeptr     -> the address of the current code pointer
1889      ptrptr      -> the address of the current pattern pointer
1890      errorptr    -> pointer to error message
1891      lookbehind  TRUE if this is a lookbehind assertion
1892      condref     > 0 for OPT_CREF setting at start of conditional group
1893      reqchar     -> place to put the last required character, or a negative number
1894      countlits   -> place to put the shortest literal count of any branch
1895      cd          points to the data block with tables pointers
1896    
1897  Returns:    TRUE on success  Returns:      TRUE on success
1898  */  */
1899    
1900  static BOOL  static BOOL
1901  compile_regex(int options, int *brackets, uschar **codeptr,  compile_regex(int options, int optchanged, int *brackets, uschar **codeptr,
1902    const uschar **ptrptr, const char **errorptr)    const uschar **ptrptr, const char **errorptr, BOOL lookbehind, int condref,
1903      int *reqchar, int *countlits, compile_data *cd)
1904  {  {
1905  const uschar *ptr = *ptrptr;  const uschar *ptr = *ptrptr;
1906  uschar *code = *codeptr;  uschar *code = *codeptr;
1907    uschar *last_branch = code;
1908  uschar *start_bracket = code;  uschar *start_bracket = code;
1909    uschar *reverse_count = NULL;
1910    int oldoptions = options & PCRE_IMS;
1911    int branchreqchar, branchcountlits;
1912    
1913    *reqchar = -1;
1914    *countlits = INT_MAX;
1915    code += 3;
1916    
1917    /* At the start of a reference-based conditional group, insert the reference
1918    number as an OP_CREF item. */
1919    
1920    if (condref > 0)
1921      {
1922      *code++ = OP_CREF;
1923      *code++ = condref;
1924      }
1925    
1926    /* Loop for each alternative branch */
1927    
1928  for (;;)  for (;;)
1929    {    {
1930    int length;    int length;
   uschar *last_branch = code;  
1931    
1932    code += 3;    /* Handle change of options */
1933    if (!compile_branch(options, brackets, &code, &ptr, errorptr))  
1934      if (optchanged >= 0)
1935        {
1936        *code++ = OP_OPT;
1937        *code++ = optchanged;
1938        options = (options & ~PCRE_IMS) | optchanged;
1939        }
1940    
1941      /* Set up dummy OP_REVERSE if lookbehind assertion */
1942    
1943      if (lookbehind)
1944        {
1945        *code++ = OP_REVERSE;
1946        reverse_count = code;
1947        *code++ = 0;
1948        *code++ = 0;
1949        }
1950    
1951      /* Now compile the branch */
1952    
1953      if (!compile_branch(options, brackets, &code, &ptr, errorptr, &optchanged,
1954          &branchreqchar, &branchcountlits, cd))
1955      {      {
1956      *ptrptr = ptr;      *ptrptr = ptr;
1957      return FALSE;      return FALSE;
# Line 1365  for (;;) Line 1963  for (;;)
1963    last_branch[1] = length >> 8;    last_branch[1] = length >> 8;
1964    last_branch[2] = length & 255;    last_branch[2] = length & 255;
1965    
1966      /* Save the last required character if all branches have the same; a current
1967      value of -1 means unset, while -2 means "previous branch had no last required
1968      char".  */
1969    
1970      if (*reqchar != -2)
1971        {
1972        if (branchreqchar >= 0)
1973          {
1974          if (*reqchar == -1) *reqchar = branchreqchar;
1975          else if (*reqchar != branchreqchar) *reqchar = -2;
1976          }
1977        else *reqchar = -2;
1978        }
1979    
1980      /* Keep the shortest literal count */
1981    
1982      if (branchcountlits < *countlits) *countlits = branchcountlits;
1983      DPRINTF(("literal count = %d min=%d\n", branchcountlits, *countlits));
1984    
1985      /* If lookbehind, check that this branch matches a fixed-length string,
1986      and put the length into the OP_REVERSE item. Temporarily mark the end of
1987      the branch with OP_END. */
1988    
1989      if (lookbehind)
1990        {
1991        *code = OP_END;
1992        length = find_fixedlength(last_branch);
1993        DPRINTF(("fixed length = %d\n", length));
1994        if (length < 0)
1995          {
1996          *errorptr = ERR25;
1997          *ptrptr = ptr;
1998          return FALSE;
1999          }
2000        reverse_count[0] = (length >> 8);
2001        reverse_count[1] = length & 255;
2002        }
2003    
2004    /* Reached end of expression, either ')' or end of pattern. Insert a    /* Reached end of expression, either ')' or end of pattern. Insert a
2005    terminating ket and the length of the whole bracketed item, and return,    terminating ket and the length of the whole bracketed item, and return,
2006    leaving the pointer at the terminating char. */    leaving the pointer at the terminating char. If any of the ims options
2007      were changed inside the group, compile a resetting op-code following. */
2008    
2009    if (*ptr != '|')    if (*ptr != '|')
2010      {      {
# Line 1375  for (;;) Line 2012  for (;;)
2012      *code++ = OP_KET;      *code++ = OP_KET;
2013      *code++ = length >> 8;      *code++ = length >> 8;
2014      *code++ = length & 255;      *code++ = length & 255;
2015        if (optchanged >= 0)
2016          {
2017          *code++ = OP_OPT;
2018          *code++ = oldoptions;
2019          }
2020      *codeptr = code;      *codeptr = code;
2021      *ptrptr = ptr;      *ptrptr = ptr;
2022      return TRUE;      return TRUE;
# Line 1383  for (;;) Line 2025  for (;;)
2025    /* Another branch follows; insert an "or" node and advance the pointer. */    /* Another branch follows; insert an "or" node and advance the pointer. */
2026    
2027    *code = OP_ALT;    *code = OP_ALT;
2028      last_branch = code;
2029      code += 3;
2030    ptr++;    ptr++;
2031    }    }
2032  /* Control never reaches here */  /* Control never reaches here */
# Line 1390  for (;;) Line 2034  for (;;)
2034    
2035    
2036    
2037    
2038    /*************************************************
2039    *      Find first significant op code            *
2040    *************************************************/
2041    
2042    /* This is called by several functions that scan a compiled expression looking
2043    for a fixed first character, or an anchoring op code etc. It skips over things
2044    that do not influence this. For one application, a change of caseless option is
2045    important.
2046    
2047    Arguments:
2048      code       pointer to the start of the group
2049      options    pointer to external options
2050      optbit     the option bit whose changing is significant, or
2051                 zero if none are
2052      optstop    TRUE to return on option change, otherwise change the options
2053                   value and continue
2054    
2055    Returns:     pointer to the first significant opcode
2056    */
2057    
2058    static const uschar*
2059    first_significant_code(const uschar *code, int *options, int optbit,
2060      BOOL optstop)
2061    {
2062    for (;;)
2063      {
2064      switch ((int)*code)
2065        {
2066        case OP_OPT:
2067        if (optbit > 0 && ((int)code[1] & optbit) != (*options & optbit))
2068          {
2069          if (optstop) return code;
2070          *options = (int)code[1];
2071          }
2072        code += 2;
2073        break;
2074    
2075        case OP_CREF:
2076        code += 2;
2077        break;
2078    
2079        case OP_WORD_BOUNDARY:
2080        case OP_NOT_WORD_BOUNDARY:
2081        code++;
2082        break;
2083    
2084        case OP_ASSERT_NOT:
2085        case OP_ASSERTBACK:
2086        case OP_ASSERTBACK_NOT:
2087        do code += (code[1] << 8) + code[2]; while (*code == OP_ALT);
2088        code += 3;
2089        break;
2090    
2091        default:
2092        return code;
2093        }
2094      }
2095    /* Control never reaches here */
2096    }
2097    
2098    
2099    
2100    
2101  /*************************************************  /*************************************************
2102  *          Check for anchored expression         *  *          Check for anchored expression         *
2103  *************************************************/  *************************************************/
# Line 1400  all of whose alternatives start with OP_ Line 2108  all of whose alternatives start with OP_
2108  it's anchored. However, if this is a multiline pattern, then only OP_SOD  it's anchored. However, if this is a multiline pattern, then only OP_SOD
2109  counts, since OP_CIRC can match in the middle.  counts, since OP_CIRC can match in the middle.
2110    
2111  A branch is also implicitly anchored if it starts with .* because that will try  A branch is also implicitly anchored if it starts with .* and DOTALL is set,
2112  the rest of the pattern at all possible matching points, so there is no point  because that will try the rest of the pattern at all possible matching points,
2113  trying them again.  so there is no point trying them again.
2114    
2115  Argument:  points to start of expression (the bracket)  Arguments:
2116  Returns:   TRUE or FALSE    code       points to start of expression (the bracket)
2117      options    points to the options setting
2118    
2119    Returns:     TRUE or FALSE
2120  */  */
2121    
2122  static BOOL  static BOOL
2123  is_anchored(register const uschar *code, BOOL multiline)  is_anchored(register const uschar *code, int *options)
2124  {  {
2125  do {  do {
2126     int op = (int)code[3];     const uschar *scode = first_significant_code(code + 3, options,
2127     if (op >= OP_BRA || op == OP_ASSERT || op == OP_ONCE)       PCRE_MULTILINE, FALSE);
2128       { if (!is_anchored(code+3, multiline)) return FALSE; }     register int op = *scode;
2129     else if (op == OP_TYPESTAR || op == OP_TYPEMINSTAR)     if (op >= OP_BRA || op == OP_ASSERT || op == OP_ONCE || op == OP_COND)
2130       { if (code[4] != OP_ANY) return FALSE; }       { if (!is_anchored(scode, options)) return FALSE; }
2131     else if (op != OP_SOD && (multiline || op != OP_CIRC)) return FALSE;     else if ((op == OP_TYPESTAR || op == OP_TYPEMINSTAR) &&
2132                (*options & PCRE_DOTALL) != 0)
2133         { if (scode[1] != OP_ANY) return FALSE; }
2134       else if (op != OP_SOD &&
2135               ((*options & PCRE_MULTILINE) != 0 || op != OP_CIRC))
2136         return FALSE;
2137     code += (code[1] << 8) + code[2];     code += (code[1] << 8) + code[2];
2138     }     }
2139  while (*code == OP_ALT);  while (*code == OP_ALT);
# Line 1427  return TRUE; Line 2143  return TRUE;
2143    
2144    
2145  /*************************************************  /*************************************************
2146  *     Check for start with \n line expression    *  *         Check for starting with ^ or .*        *
2147  *************************************************/  *************************************************/
2148    
2149  /* This is called for multiline expressions to try to find out if every branch  /* This is called to find out if every branch starts with ^ or .* so that
2150  starts with ^ so that "first char" processing can be done to speed things up.  "first char" processing can be done to speed things up in multiline
2151    matching and for non-DOTALL patterns that start with .* (which must start at
2152    the beginning or after \n).
2153    
2154  Argument:  points to start of expression (the bracket)  Argument:  points to start of expression (the bracket)
2155  Returns:   TRUE or FALSE  Returns:   TRUE or FALSE
# Line 1441  static BOOL Line 2159  static BOOL
2159  is_startline(const uschar *code)  is_startline(const uschar *code)
2160  {  {
2161  do {  do {
2162     if ((int)code[3] >= OP_BRA || code[3] == OP_ASSERT)     const uschar *scode = first_significant_code(code + 3, NULL, 0, FALSE);
2163       { if (!is_startline(code+3)) return FALSE; }     register int op = *scode;
2164     else if (code[3] != OP_CIRC) return FALSE;     if (op >= OP_BRA || op == OP_ASSERT || op == OP_ONCE || op == OP_COND)
2165         { if (!is_startline(scode)) return FALSE; }
2166       else if (op == OP_TYPESTAR || op == OP_TYPEMINSTAR)
2167         { if (scode[1] != OP_ANY) return FALSE; }
2168       else if (op != OP_CIRC) return FALSE;
2169     code += (code[1] << 8) + code[2];     code += (code[1] << 8) + code[2];
2170     }     }
2171  while (*code == OP_ALT);  while (*code == OP_ALT);
# Line 1462  Consider each alternative branch. If the Line 2184  Consider each alternative branch. If the
2184  a bracket all of whose alternatives start with the same char (recurse ad lib),  a bracket all of whose alternatives start with the same char (recurse ad lib),
2185  then we return that char, otherwise -1.  then we return that char, otherwise -1.
2186    
2187  Argument:  points to start of expression (the bracket)  Arguments:
2188  Returns:   -1 or the fixed first char    code       points to start of expression (the bracket)
2189      options    pointer to the options (used to check casing changes)
2190    
2191    Returns:     -1 or the fixed first char
2192  */  */
2193    
2194  static int  static int
2195  find_firstchar(uschar *code)  find_firstchar(const uschar *code, int *options)
2196  {  {
2197  register int c = -1;  register int c = -1;
2198  do  do {
2199    {     int d;
2200    register int charoffset = 4;     const uschar *scode = first_significant_code(code + 3, options,
2201         PCRE_CASELESS, TRUE);
2202    if ((int)code[3] >= OP_BRA || code[3] == OP_ASSERT)     register int op = *scode;
2203      {  
2204      register int d;     if (op >= OP_BRA) op = OP_BRA;
2205      if ((d = find_firstchar(code+3)) < 0) return -1;  
2206      if (c < 0) c = d; else if (c != d) return -1;     switch(op)
2207      }       {
2208         default:
2209    else switch(code[3])       return -1;
2210      {  
2211      default:       case OP_BRA:
2212      return -1;       case OP_ASSERT:
2213         case OP_ONCE:
2214      case OP_EXACT:       /* Fall through */       case OP_COND:
2215      charoffset++;       if ((d = find_firstchar(scode, options)) < 0) return -1;
2216         if (c < 0) c = d; else if (c != d) return -1;
2217      case OP_CHARS:       /* Fall through */       break;
2218      charoffset++;  
2219         case OP_EXACT:       /* Fall through */
2220         scode++;
2221    
2222         case OP_CHARS:       /* Fall through */
2223         scode++;
2224    
2225         case OP_PLUS:
2226         case OP_MINPLUS:
2227         if (c < 0) c = scode[1]; else if (c != scode[1]) return -1;
2228         break;
2229         }
2230    
2231      case OP_PLUS:     code += (code[1] << 8) + code[2];
2232      case OP_MINPLUS:     }
     if (c < 0) c = code[charoffset]; else if (c != code[charoffset]) return -1;  
     break;  
     }  
   code += (code[1] << 8) + code[2];  
   }  
2233  while (*code == OP_ALT);  while (*code == OP_ALT);
2234  return c;  return c;
2235  }  }
2236    
2237    
2238    
2239    
2240    
2241  /*************************************************  /*************************************************
2242  *        Compile a Regular Expression            *  *        Compile a Regular Expression            *
2243  *************************************************/  *************************************************/
# Line 1517  Arguments: Line 2250  Arguments:
2250    options      various option bits    options      various option bits
2251    errorptr     pointer to pointer to error text    errorptr     pointer to pointer to error text
2252    erroroffset  ptr offset in pattern where error was detected    erroroffset  ptr offset in pattern where error was detected
2253      tables       pointer to character tables or NULL
2254    
2255  Returns:       pointer to compiled data block, or NULL on error,  Returns:       pointer to compiled data block, or NULL on error,
2256                 with errorptr and erroroffset set                 with errorptr and erroroffset set
# Line 1524  Returns:       pointer to compiled data Line 2258  Returns:       pointer to compiled data
2258    
2259  pcre *  pcre *
2260  pcre_compile(const char *pattern, int options, const char **errorptr,  pcre_compile(const char *pattern, int options, const char **errorptr,
2261    int *erroroffset)    int *erroroffset, const unsigned char *tables)
2262  {  {
2263  real_pcre *re;  real_pcre *re;
 int spaces = 0;  
2264  int length = 3;      /* For initial BRA plus length */  int length = 3;      /* For initial BRA plus length */
2265  int runlength;  int runlength;
2266  int c, size;  int c, reqchar, countlits;
2267  int bracount = 0;  int bracount = 0;
 int brastack[200];  
2268  int top_backref = 0;  int top_backref = 0;
2269    int branch_extra = 0;
2270    int branch_newextra;
2271  unsigned int brastackptr = 0;  unsigned int brastackptr = 0;
2272    size_t size;
2273  uschar *code;  uschar *code;
2274  const uschar *ptr;  const uschar *ptr;
2275    compile_data compile_block;
2276    int brastack[BRASTACK_SIZE];
2277    uschar bralenstack[BRASTACK_SIZE];
2278    
2279  #ifdef DEBUG  #ifdef DEBUG
2280  uschar *code_base, *code_end;  uschar *code_base, *code_end;
# Line 1563  if ((options & ~PUBLIC_OPTIONS) != 0) Line 2301  if ((options & ~PUBLIC_OPTIONS) != 0)
2301    return NULL;    return NULL;
2302    }    }
2303    
2304    /* Set up pointers to the individual character tables */
2305    
2306    if (tables == NULL) tables = pcre_default_tables;
2307    compile_block.lcc = tables + lcc_offset;
2308    compile_block.fcc = tables + fcc_offset;
2309    compile_block.cbits = tables + cbits_offset;
2310    compile_block.ctypes = tables + ctypes_offset;
2311    
2312    /* Reflect pattern for debugging output */
2313    
2314  DPRINTF(("------------------------------------------------------------------\n"));  DPRINTF(("------------------------------------------------------------------\n"));
2315  DPRINTF(("%s\n", pattern));  DPRINTF(("%s\n", pattern));
2316    
# Line 1579  while ((c = *(++ptr)) != 0) Line 2327  while ((c = *(++ptr)) != 0)
2327    int min, max;    int min, max;
2328    int class_charcount;    int class_charcount;
2329    
2330    if ((pcre_ctypes[c] & ctype_space) != 0)    if ((options & PCRE_EXTENDED) != 0)
     {  
     if ((options & PCRE_EXTENDED) != 0) continue;  
     spaces++;  
     }  
   
   if (c == '#' && (options & PCRE_EXTENDED) != 0)  
2331      {      {
2332      while ((c = *(++ptr)) != 0 && c != '\n');      if ((compile_block.ctypes[c] & ctype_space) != 0) continue;
2333      continue;      if (c == '#')
2334          {
2335          /* The space before the ; is to avoid a warning on a silly compiler
2336          on the Macintosh. */
2337          while ((c = *(++ptr)) != 0 && c != '\n') ;
2338          continue;
2339          }
2340      }      }
2341    
2342    switch(c)    switch(c)
# Line 1601  while ((c = *(++ptr)) != 0) Line 2349  while ((c = *(++ptr)) != 0)
2349      case '\\':      case '\\':
2350        {        {
2351        const uschar *save_ptr = ptr;        const uschar *save_ptr = ptr;
2352        c = check_escape(&ptr, errorptr, bracount, options, FALSE);        c = check_escape(&ptr, errorptr, bracount, options, FALSE, &compile_block);
2353        if (*errorptr != NULL) goto PCRE_ERROR_RETURN;        if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
2354        if (c >= 0)        if (c >= 0)
2355          {          {
# Line 1621  while ((c = *(++ptr)) != 0) Line 2369  while ((c = *(++ptr)) != 0)
2369        int refnum = -c - ESC_REF;        int refnum = -c - ESC_REF;
2370        if (refnum > top_backref) top_backref = refnum;        if (refnum > top_backref) top_backref = refnum;
2371        length++;   /* For single back reference */        length++;   /* For single back reference */
2372        if (ptr[1] == '{' && is_counted_repeat(ptr+2))        if (ptr[1] == '{' && is_counted_repeat(ptr+2, &compile_block))
2373          {          {
2374          ptr = read_repeat_counts(ptr+2, &min, &max, errorptr);          ptr = read_repeat_counts(ptr+2, &min, &max, errorptr, &compile_block);
2375          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
2376          if ((min == 0 && (max == 1 || max == -1)) ||          if ((min == 0 && (max == 1 || max == -1)) ||
2377            (min == 1 && max == -1))            (min == 1 && max == -1))
# Line 1647  while ((c = *(++ptr)) != 0) Line 2395  while ((c = *(++ptr)) != 0)
2395      or back reference. */      or back reference. */
2396    
2397      case '{':      case '{':
2398      if (!is_counted_repeat(ptr+1)) goto NORMAL_CHAR;      if (!is_counted_repeat(ptr+1, &compile_block)) goto NORMAL_CHAR;
2399      ptr = read_repeat_counts(ptr+1, &min, &max, errorptr);      ptr = read_repeat_counts(ptr+1, &min, &max, errorptr, &compile_block);
2400      if (*errorptr != NULL) goto PCRE_ERROR_RETURN;      if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
2401      if ((min == 0 && (max == 1 || max == -1)) ||      if ((min == 0 && (max == 1 || max == -1)) ||
2402        (min == 1 && max == -1))        (min == 1 && max == -1))
# Line 1662  while ((c = *(++ptr)) != 0) Line 2410  while ((c = *(++ptr)) != 0)
2410      if (ptr[1] == '?') ptr++;      if (ptr[1] == '?') ptr++;
2411      continue;      continue;
2412    
2413      /* An alternation contains an offset to the next branch or ket. */      /* An alternation contains an offset to the next branch or ket. If any ims
2414        options changed in the previous branch(es), and/or if we are in a
2415        lookbehind assertion, extra space will be needed at the start of the
2416        branch. This is handled by branch_extra. */
2417    
2418      case '|':      case '|':
2419      length += 3;      length += 3 + branch_extra;
2420      continue;      continue;
2421    
2422      /* A character class uses 33 characters. Don't worry about character types      /* A character class uses 33 characters. Don't worry about character types
# Line 1679  while ((c = *(++ptr)) != 0) Line 2431  while ((c = *(++ptr)) != 0)
2431        {        {
2432        if (*ptr == '\\')        if (*ptr == '\\')
2433          {          {
2434          int ch = check_escape(&ptr, errorptr, bracount, options, TRUE);          int ch = check_escape(&ptr, errorptr, bracount, options, TRUE,
2435              &compile_block);
2436          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
2437          if (-ch == ESC_b) class_charcount++; else class_charcount = 10;          if (-ch == ESC_b) class_charcount++; else class_charcount = 10;
2438          }          }
# Line 1696  while ((c = *(++ptr)) != 0) Line 2449  while ((c = *(++ptr)) != 0)
2449    
2450        /* A repeat needs either 1 or 5 bytes. */        /* A repeat needs either 1 or 5 bytes. */
2451    
2452        if (*ptr != 0 && ptr[1] == '{' && is_counted_repeat(ptr+2))        if (*ptr != 0 && ptr[1] == '{' && is_counted_repeat(ptr+2, &compile_block))
2453          {          {
2454          ptr = read_repeat_counts(ptr+2, &min, &max, errorptr);          ptr = read_repeat_counts(ptr+2, &min, &max, errorptr, &compile_block);
2455          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
2456          if ((min == 0 && (max == 1 || max == -1)) ||          if ((min == 0 && (max == 1 || max == -1)) ||
2457            (min == 1 && max == -1))            (min == 1 && max == -1))
# Line 1712  while ((c = *(++ptr)) != 0) Line 2465  while ((c = *(++ptr)) != 0)
2465      /* Brackets may be genuine groups or special things */      /* Brackets may be genuine groups or special things */
2466    
2467      case '(':      case '(':
2468        branch_newextra = 0;
2469    
2470      /* Handle special forms of bracket, which all start (? */      /* Handle special forms of bracket, which all start (? */
2471    
2472      if (ptr[1] == '?') switch (c = ptr[2])      if (ptr[1] == '?')
2473        {        {
2474        /* Skip over comments entirely */        int set, unset;
2475        case '#':        int *optset;
       ptr += 3;  
       while (*ptr != 0 && *ptr != ')') ptr++;  
       if (*ptr == 0)  
         {  
         *errorptr = ERR18;  
         goto PCRE_ERROR_RETURN;  
         }  
       continue;  
2476    
2477        /* Non-referencing groups and lookaheads just move the pointer on, and        switch (c = ptr[2])
2478        then behave like a non-special bracket, except that they don't increment          {
2479        the count of extracting brackets. */          /* Skip over comments entirely */
2480            case '#':
2481        case ':':          ptr += 3;
2482        case '=':          while (*ptr != 0 && *ptr != ')') ptr++;
2483        case '!':          if (*ptr == 0)
2484        ptr += 2;            {
2485        break;            *errorptr = ERR18;
2486              goto PCRE_ERROR_RETURN;
2487              }
2488            continue;
2489    
2490        /* Ditto for the "once only" bracket, allowed only if the extra bit          /* Non-referencing groups and lookaheads just move the pointer on, and
2491        is set. */          then behave like a non-special bracket, except that they don't increment
2492            the count of extracting brackets. Ditto for the "once only" bracket,
2493            which is in Perl from version 5.005. */
2494    
2495        case '>':          case ':':
2496        if ((options & PCRE_EXTRA) != 0)          case '=':
2497          {          case '!':
2498            case '>':
2499          ptr += 2;          ptr += 2;
2500          break;          break;
         }  
       /* Else fall thourh */  
2501    
2502        /* Else loop setting valid options until ) is met. Anything else is an          /* A recursive call to the regex is an extension, to provide the
2503        error. */          facility which can be obtained by $(?p{perl-code}) in Perl 5.6. */
2504    
2505        default:          case 'R':
2506        ptr += 2;          if (ptr[3] != ')')
       for (;; ptr++)  
         {  
         if ((c = *ptr) == 'i')  
2507            {            {
2508            options |= PCRE_CASELESS;            *errorptr = ERR29;
2509            continue;            goto PCRE_ERROR_RETURN;
2510            }            }
2511          else if ((c = *ptr) == 'm')          ptr += 3;
2512            length += 1;
2513            break;
2514    
2515            /* Lookbehinds are in Perl from version 5.005 */
2516    
2517            case '<':
2518            if (ptr[3] == '=' || ptr[3] == '!')
2519            {            {
2520            options |= PCRE_MULTILINE;            ptr += 3;
2521            continue;            branch_newextra = 3;
2522              length += 3;         /* For the first branch */
2523              break;
2524            }            }
2525          else if (c == 's')          *errorptr = ERR24;
2526            goto PCRE_ERROR_RETURN;
2527    
2528            /* Conditionals are in Perl from version 5.005. The bracket must either
2529            be followed by a number (for bracket reference) or by an assertion
2530            group. */
2531    
2532            case '(':
2533            if ((compile_block.ctypes[ptr[3]] & ctype_digit) != 0)
2534              {
2535              ptr += 4;
2536              length += 2;
2537              while ((compile_block.ctypes[*ptr] & ctype_digit) != 0) ptr++;
2538              if (*ptr != ')')
2539                {
2540                *errorptr = ERR26;
2541                goto PCRE_ERROR_RETURN;
2542                }
2543              }
2544            else   /* An assertion must follow */
2545            {            {
2546            options |= PCRE_DOTALL;            ptr++;   /* Can treat like ':' as far as spacing is concerned */
2547            continue;            if (ptr[2] != '?' ||
2548                 (ptr[3] != '=' && ptr[3] != '!' && ptr[3] != '<') )
2549                {
2550                ptr += 2;    /* To get right offset in message */
2551                *errorptr = ERR28;
2552                goto PCRE_ERROR_RETURN;
2553                }
2554              }
2555            break;
2556    
2557            /* Else loop checking valid options until ) is met. Anything else is an
2558            error. If we are without any brackets, i.e. at top level, the settings
2559            act as if specified in the options, so massage the options immediately.
2560            This is for backward compatibility with Perl 5.004. */
2561    
2562            default:
2563            set = unset = 0;
2564            optset = &set;
2565            ptr += 2;
2566    
2567            for (;; ptr++)
2568              {
2569              c = *ptr;
2570              switch (c)
2571                {
2572                case 'i':
2573                *optset |= PCRE_CASELESS;
2574                continue;
2575    
2576                case 'm':
2577                *optset |= PCRE_MULTILINE;
2578                continue;
2579    
2580                case 's':
2581                *optset |= PCRE_DOTALL;
2582                continue;
2583    
2584                case 'x':
2585                *optset |= PCRE_EXTENDED;
2586                continue;
2587    
2588                case 'X':
2589                *optset |= PCRE_EXTRA;
2590                continue;
2591    
2592                case 'U':
2593                *optset |= PCRE_UNGREEDY;
2594                continue;
2595    
2596                case '-':
2597                optset = &unset;
2598                continue;
2599    
2600                /* A termination by ')' indicates an options-setting-only item;
2601                this is global at top level; otherwise nothing is done here and
2602                it is handled during the compiling process on a per-bracket-group
2603                basis. */
2604    
2605                case ')':
2606                if (brastackptr == 0)
2607                  {
2608                  options = (options | set) & (~unset);
2609                  set = unset = 0;     /* To save length */
2610                  }
2611                /* Fall through */
2612    
2613                /* A termination by ':' indicates the start of a nested group with
2614                the given options set. This is again handled at compile time, but
2615                we must allow for compiled space if any of the ims options are
2616                set. We also have to allow for resetting space at the end of
2617                the group, which is why 4 is added to the length and not just 2.
2618                If there are several changes of options within the same group, this
2619                will lead to an over-estimate on the length, but this shouldn't
2620                matter very much. We also have to allow for resetting options at
2621                the start of any alternations, which we do by setting
2622                branch_newextra to 2. Finally, we record whether the case-dependent
2623                flag ever changes within the regex. This is used by the "required
2624                character" code. */
2625    
2626                case ':':
2627                if (((set|unset) & PCRE_IMS) != 0)
2628                  {
2629                  length += 4;
2630                  branch_newextra = 2;
2631                  if (((set|unset) & PCRE_CASELESS) != 0) options |= PCRE_ICHANGED;
2632                  }
2633                goto END_OPTIONS;
2634    
2635                /* Unrecognized option character */
2636    
2637                default:
2638                *errorptr = ERR12;
2639                goto PCRE_ERROR_RETURN;
2640                }
2641            }            }
2642          else if (c == 'x')  
2643            /* If we hit a closing bracket, that's it - this is a freestanding
2644            option-setting. We need to ensure that branch_extra is updated if
2645            necessary. The only values branch_newextra can have here are 0 or 2.
2646            If the value is 2, then branch_extra must either be 2 or 5, depending
2647            on whether this is a lookbehind group or not. */
2648    
2649            END_OPTIONS:
2650            if (c == ')')
2651            {            {
2652            options |= PCRE_EXTENDED;            if (branch_newextra == 2 && (branch_extra == 0 || branch_extra == 3))
2653            length -= spaces;          /* Already counted spaces */              branch_extra += branch_newextra;
2654            continue;            continue;
2655            }            }
         else if (c == ')') break;  
2656    
2657          *errorptr = ERR12;          /* If options were terminated by ':' control comes here. Fall through
2658          goto PCRE_ERROR_RETURN;          to handle the group below. */
2659          }          }
       continue;                      /* End of this bracket handling */  
2660        }        }
2661    
2662      /* Extracting brackets must be counted so we can process escapes in a      /* Extracting brackets must be counted so we can process escapes in a
# Line 1791  while ((c = *(++ptr)) != 0) Line 2665  while ((c = *(++ptr)) != 0)
2665      else bracount++;      else bracount++;
2666    
2667      /* Non-special forms of bracket. Save length for computing whole length      /* Non-special forms of bracket. Save length for computing whole length
2668      at end if there's a repeat that requires duplication of the group. */      at end if there's a repeat that requires duplication of the group. Also
2669        save the current value of branch_extra, and start the new group with
2670        the new value. If non-zero, this will either be 2 for a (?imsx: group, or 3
2671        for a lookbehind assertion. */
2672    
2673      if (brastackptr >= sizeof(brastack)/sizeof(int))      if (brastackptr >= sizeof(brastack)/sizeof(int))
2674        {        {
# Line 1799  while ((c = *(++ptr)) != 0) Line 2676  while ((c = *(++ptr)) != 0)
2676        goto PCRE_ERROR_RETURN;        goto PCRE_ERROR_RETURN;
2677        }        }
2678    
2679        bralenstack[brastackptr] = branch_extra;
2680        branch_extra = branch_newextra;
2681    
2682      brastack[brastackptr++] = length;      brastack[brastackptr++] = length;
2683      length += 3;      length += 3;
2684      continue;      continue;
# Line 1806  while ((c = *(++ptr)) != 0) Line 2686  while ((c = *(++ptr)) != 0)
2686      /* Handle ket. Look for subsequent max/min; for certain sets of values we      /* Handle ket. Look for subsequent max/min; for certain sets of values we
2687      have to replicate this bracket up to that many times. If brastackptr is      have to replicate this bracket up to that many times. If brastackptr is
2688      0 this is an unmatched bracket which will generate an error, but take care      0 this is an unmatched bracket which will generate an error, but take care
2689      not to try to access brastack[-1]. */      not to try to access brastack[-1] when computing the length and restoring
2690        the branch_extra value. */
2691    
2692      case ')':      case ')':
2693      length += 3;      length += 3;
2694        {        {
2695        int minval = 1;        int minval = 1;
2696        int maxval = 1;        int maxval = 1;
2697        int duplength = (brastackptr > 0)? length - brastack[--brastackptr] : 0;        int duplength;
2698    
2699          if (brastackptr > 0)
2700            {
2701            duplength = length - brastack[--brastackptr];
2702            branch_extra = bralenstack[brastackptr];
2703            }
2704          else duplength = 0;
2705    
2706        /* Leave ptr at the final char; for read_repeat_counts this happens        /* Leave ptr at the final char; for read_repeat_counts this happens
2707        automatically; for the others we need an increment. */        automatically; for the others we need an increment. */
2708    
2709        if ((c = ptr[1]) == '{' && is_counted_repeat(ptr+2))        if ((c = ptr[1]) == '{' && is_counted_repeat(ptr+2, &compile_block))
2710          {          {
2711          ptr = read_repeat_counts(ptr+2, &minval, &maxval, errorptr);          ptr = read_repeat_counts(ptr+2, &minval, &maxval, errorptr,
2712              &compile_block);
2713          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
2714          }          }
2715        else if (c == '*') { minval = 0; maxval = -1; ptr++; }        else if (c == '*') { minval = 0; maxval = -1; ptr++; }
2716        else if (c == '+') { maxval = -1; ptr++; }        else if (c == '+') { maxval = -1; ptr++; }
2717        else if (c == '?') { minval = 0; ptr++; }        else if (c == '?') { minval = 0; ptr++; }
2718    
2719        /* If there is a minimum > 1 we have to replicate up to minval-1 times;        /* If the minimum is zero, we have to allow for an OP_BRAZERO before the
2720        if there is a limited maximum we have to replicate up to maxval-1 times        group, and if the maximum is greater than zero, we have to replicate
2721        and allow for a BRAZERO item before each optional copy, as we also have        maxval-1 times; each replication acquires an OP_BRAZERO plus a nesting
2722        to do before the first copy if the minimum is zero. */        bracket set - hence the 7. */
2723    
2724        if (minval == 0) length++;        if (minval == 0)
2725          else if (minval > 1) length += (minval - 1) * duplength;          {
2726        if (maxval > minval) length += (maxval - minval) * (duplength + 1);          length++;
2727            if (maxval > 0) length += (maxval - 1) * (duplength + 7);
2728            }
2729    
2730          /* When the minimum is greater than zero, 1 we have to replicate up to
2731          minval-1 times, with no additions required in the copies. Then, if
2732          there is a limited maximum we have to replicate up to maxval-1 times
2733          allowing for a BRAZERO item before each optional copy and nesting
2734          brackets for all but one of the optional copies. */
2735    
2736          else
2737            {
2738            length += (minval - 1) * duplength;
2739            if (maxval > minval)   /* Need this test as maxval=-1 means no limit */
2740              length += (maxval - minval) * (duplength + 7) - 6;
2741            }
2742        }        }
2743      continue;      continue;
2744    
# Line 1849  while ((c = *(++ptr)) != 0) Line 2753  while ((c = *(++ptr)) != 0)
2753      runlength = 0;      runlength = 0;
2754      do      do
2755        {        {
2756        if ((pcre_ctypes[c] & ctype_space) != 0)        if ((options & PCRE_EXTENDED) != 0)
         {  
         if ((options & PCRE_EXTENDED) != 0) continue;  
         spaces++;  
         }  
   
       if (c == '#' && (options & PCRE_EXTENDED) != 0)  
2757          {          {
2758          while ((c = *(++ptr)) != 0 && c != '\n');          if ((compile_block.ctypes[c] & ctype_space) != 0) continue;
2759          continue;          if (c == '#')
2760              {
2761              /* The space before the ; is to avoid a warning on a silly compiler
2762              on the Macintosh. */
2763              while ((c = *(++ptr)) != 0 && c != '\n') ;
2764              continue;
2765              }
2766          }          }
2767    
2768        /* Backslash may introduce a data char or a metacharacter; stop the        /* Backslash may introduce a data char or a metacharacter; stop the
# Line 1867  while ((c = *(++ptr)) != 0) Line 2771  while ((c = *(++ptr)) != 0)
2771        if (c == '\\')        if (c == '\\')
2772          {          {
2773          const uschar *saveptr = ptr;          const uschar *saveptr = ptr;
2774          c = check_escape(&ptr, errorptr, bracount, options, FALSE);          c = check_escape(&ptr, errorptr, bracount, options, FALSE,
2775              &compile_block);
2776          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
2777          if (c < 0) { ptr = saveptr; break; }          if (c < 0) { ptr = saveptr; break; }
2778          }          }
# Line 1879  while ((c = *(++ptr)) != 0) Line 2784  while ((c = *(++ptr)) != 0)
2784    
2785      /* This "while" is the end of the "do" above. */      /* This "while" is the end of the "do" above. */
2786    
2787      while (runlength < 255 && (pcre_ctypes[c = *(++ptr)] & ctype_meta) == 0);      while (runlength < 255 &&
2788          (compile_block.ctypes[c = *(++ptr)] & ctype_meta) == 0);
2789    
2790      ptr--;      ptr--;
2791      length += runlength;      length += runlength;
# Line 1910  if (re == NULL) Line 2816  if (re == NULL)
2816    return NULL;    return NULL;
2817    }    }
2818    
2819  /* Put in the magic number and the options. */  /* Put in the magic number, and save the size, options, and table pointer */
2820    
2821  re->magic_number = MAGIC_NUMBER;  re->magic_number = MAGIC_NUMBER;
2822    re->size = size;
2823  re->options = options;  re->options = options;
2824    re->tables = tables;
2825    
2826  /* Set up a starting, non-extracting bracket, then compile the expression. On  /* Set up a starting, non-extracting bracket, then compile the expression. On
2827  error, *errorptr will be set non-NULL, so we don't need to look at the result  error, *errorptr will be set non-NULL, so we don't need to look at the result
# Line 1923  ptr = (const uschar *)pattern; Line 2831  ptr = (const uschar *)pattern;
2831  code = re->code;  code = re->code;
2832  *code = OP_BRA;  *code = OP_BRA;
2833  bracount = 0;  bracount = 0;
2834  (void)compile_regex(options, &bracount, &code, &ptr, errorptr);  (void)compile_regex(options, -1, &bracount, &code, &ptr, errorptr, FALSE, -1,
2835      &reqchar, &countlits, &compile_block);
2836  re->top_bracket = bracount;  re->top_bracket = bracount;
2837  re->top_backref = top_backref;  re->top_backref = top_backref;
2838    
# Line 1940  if debugging, leave the test till after Line 2849  if debugging, leave the test till after
2849  if (code - re->code > length) *errorptr = ERR23;  if (code - re->code > length) *errorptr = ERR23;
2850  #endif  #endif
2851    
2852    /* Give an error if there's back reference to a non-existent capturing
2853    subpattern. */
2854    
2855    if (top_backref > re->top_bracket) *errorptr = ERR15;
2856    
2857  /* Failed to compile */  /* Failed to compile */
2858    
2859  if (*errorptr != NULL)  if (*errorptr != NULL)
# Line 1950  if (*errorptr != NULL) Line 2864  if (*errorptr != NULL)
2864    return NULL;    return NULL;
2865    }    }
2866    
2867  /* If the anchored option was not passed, set flag if we can determine that it  /* If the anchored option was not passed, set flag if we can determine that the
2868  is anchored by virtue of ^ characters or \A or anything else. Otherwise, see if  pattern is anchored by virtue of ^ characters or \A or anything else (such as
2869  we can determine what the first character has to be, because that speeds up  starting with .* when DOTALL is set).
2870  unanchored matches no end. In the case of multiline matches, an alternative is  
2871  to set the PCRE_STARTLINE flag if all branches start with ^. */  Otherwise, see if we can determine what the first character has to be, because
2872    that speeds up unanchored matches no end. If not, see if we can set the
2873    PCRE_STARTLINE flag. This is helpful for multiline matches when all branches
2874    start with ^. and also when all branches start with .* for non-DOTALL matches.
2875    */
2876    
2877  if ((options & PCRE_ANCHORED) == 0)  if ((options & PCRE_ANCHORED) == 0)
2878    {    {
2879    if (is_anchored(re->code, (options & PCRE_MULTILINE) != 0))    int temp_options = options;
2880      if (is_anchored(re->code, &temp_options))
2881      re->options |= PCRE_ANCHORED;      re->options |= PCRE_ANCHORED;
2882    else    else
2883      {      {
2884      int ch = find_firstchar(re->code);      int ch = find_firstchar(re->code, &temp_options);
2885      if (ch >= 0)      if (ch >= 0)
2886        {        {
2887        re->first_char = ch;        re->first_char = ch;
# Line 1973  if ((options & PCRE_ANCHORED) == 0) Line 2892  if ((options & PCRE_ANCHORED) == 0)
2892      }      }
2893    }    }
2894    
2895    /* Save the last required character if there are at least two literal
2896    characters on all paths, or if there is no first character setting. */
2897    
2898    if (reqchar >= 0 && (countlits > 1 || (re->options & PCRE_FIRSTSET) == 0))
2899      {
2900      re->req_char = reqchar;
2901      re->options |= PCRE_REQCHSET;
2902      }
2903    
2904  /* Print out the compiled data for debugging */  /* Print out the compiled data for debugging */
2905    
2906  #ifdef DEBUG  #ifdef DEBUG
2907    
2908  printf("Length = %d top_bracket = %d top_backref=%d\n",  printf("Length = %d top_bracket = %d top_backref = %d\n",
2909    length, re->top_bracket, re->top_backref);    length, re->top_bracket, re->top_backref);
2910    
2911  if (re->options != 0)  if (re->options != 0)
2912    {    {
2913    printf("%s%s%s%s%s%s%s\n",    printf("%s%s%s%s%s%s%s%s%s\n",
2914      ((re->options & PCRE_ANCHORED) != 0)? "anchored " : "",      ((re->options & PCRE_ANCHORED) != 0)? "anchored " : "",
2915      ((re->options & PCRE_CASELESS) != 0)? "caseless " : "",      ((re->options & PCRE_CASELESS) != 0)? "caseless " : "",
2916        ((re->options & PCRE_ICHANGED) != 0)? "case state changed " : "",
2917      ((re->options & PCRE_EXTENDED) != 0)? "extended " : "",      ((re->options & PCRE_EXTENDED) != 0)? "extended " : "",
2918      ((re->options & PCRE_MULTILINE) != 0)? "multiline " : "",      ((re->options & PCRE_MULTILINE) != 0)? "multiline " : "",
2919      ((re->options & PCRE_DOTALL) != 0)? "dotall " : "",      ((re->options & PCRE_DOTALL) != 0)? "dotall " : "",
2920      ((re->options & PCRE_DOLLAR_ENDONLY) != 0)? "endonly " : "",      ((re->options & PCRE_DOLLAR_ENDONLY) != 0)? "endonly " : "",
2921      ((re->options & PCRE_EXTRA) != 0)? "extra " : "");      ((re->options & PCRE_EXTRA) != 0)? "extra " : "",
2922        ((re->options & PCRE_UNGREEDY) != 0)? "ungreedy " : "");
2923    }    }
2924    
2925  if ((re->options & PCRE_FIRSTSET) != 0)  if ((re->options & PCRE_FIRSTSET) != 0)
# Line 1998  if ((re->options & PCRE_FIRSTSET) != 0) Line 2928  if ((re->options & PCRE_FIRSTSET) != 0)
2928      else printf("First char = \\x%02x\n", re->first_char);      else printf("First char = \\x%02x\n", re->first_char);
2929    }    }
2930    
2931    if ((re->options & PCRE_REQCHSET) != 0)
2932      {
2933      if (isprint(re->req_char)) printf("Req char = %c\n", re->req_char);
2934        else printf("Req char = \\x%02x\n", re->req_char);
2935      }
2936    
2937  code_end = code;  code_end = code;
2938  code_base = code = re->code;  code_base = code = re->code;
2939    
# Line 2015  while (code < code_end) Line 2951  while (code < code_end)
2951    
2952    else switch(*code)    else switch(*code)
2953      {      {
2954        case OP_OPT:
2955        printf(" %.2x %s", code[1], OP_names[*code]);
2956        code++;
2957        break;
2958    
2959        case OP_COND:
2960        printf("%3d Cond", (code[1] << 8) + code[2]);
2961        code += 2;
2962        break;
2963    
2964        case OP_CREF:
2965        printf(" %.2d %s", code[1], OP_names[*code]);
2966        code++;
2967        break;
2968    
2969      case OP_CHARS:      case OP_CHARS:
2970      charlength = *(++code);      charlength = *(++code);
2971      printf("%3d ", charlength);      printf("%3d ", charlength);
# Line 2028  while (code < code_end) Line 2979  while (code < code_end)
2979      case OP_KET:      case OP_KET:
2980      case OP_ASSERT:      case OP_ASSERT:
2981      case OP_ASSERT_NOT:      case OP_ASSERT_NOT:
2982        case OP_ASSERTBACK:
2983        case OP_ASSERTBACK_NOT:
2984      case OP_ONCE:      case OP_ONCE:
2985      printf("%3d %s", (code[1] << 8) + code[2], OP_names[*code]);      printf("%3d %s", (code[1] << 8) + code[2], OP_names[*code]);
2986      code += 2;      code += 2;
2987      break;      break;
2988    
2989        case OP_REVERSE:
2990        printf("%3d %s", (code[1] << 8) + code[2], OP_names[*code]);
2991        code += 2;
2992        break;
2993    
2994      case OP_STAR:      case OP_STAR:
2995      case OP_MINSTAR:      case OP_MINSTAR:
2996      case OP_PLUS:      case OP_PLUS:
# Line 2106  while (code < code_end) Line 3064  while (code < code_end)
3064      goto CLASS_REF_REPEAT;      goto CLASS_REF_REPEAT;
3065    
3066      case OP_CLASS:      case OP_CLASS:
     case OP_NEGCLASS:  
3067        {        {
3068        int i, min, max;        int i, min, max;
3069          code++;
3070        if (*code++ == OP_CLASS) printf("    [");        printf("    [");
         else printf("   ^[");  
3071    
3072        for (i = 0; i < 256; i++)        for (i = 0; i < 256; i++)
3073          {          {
# Line 2171  while (code < code_end) Line 3127  while (code < code_end)
3127      }      }
3128    
3129    code++;    code++;
3130    printf("\n");    printf("\n");
3131    }    }
3132  printf("------------------------------------------------------------------\n");  printf("------------------------------------------------------------------\n");
   
 /* This check is done here in the debugging case so that the code that  
 was compiled can be seen. */  
   
 if (code - re->code > length)  
   {  
   *errorptr = ERR23;  
   (pcre_free)(re);  
   *erroroffset = ptr - (uschar *)pattern;  
   return NULL;  
   }  
 #endif  
   
 return (pcre *)re;  
 }  
   
   
   
 /*************************************************  
 *        Match a character type                  *  
 *************************************************/  
   
 /* Not used in all the places it might be as it's sometimes faster  
 to put the code inline.  
   
 Arguments:  
   type        the character type  
   c           the character  
   dotall      the dotall flag  
   
 Returns:      TRUE if character is of the type  
 */  
   
 static BOOL  
 match_type(int type, int c, BOOL dotall)  
 {  
3133    
3134  #ifdef DEBUG  /* This check is done here in the debugging case so that the code that
3135  if (isprint(c)) printf("matching subject %c against ", c);  was compiled can be seen. */
   else printf("matching subject \\x%02x against ", c);  
 printf("%s\n", OP_names[type]);  
 #endif  
3136    
3137  switch(type)  if (code - re->code > length)
3138    {    {
3139    case OP_ANY:            return dotall || c != '\n';    *errorptr = ERR23;
3140    case OP_NOT_DIGIT:      return (pcre_ctypes[c] & ctype_digit) == 0;    (pcre_free)(re);
3141    case OP_DIGIT:          return (pcre_ctypes[c] & ctype_digit) != 0;    *erroroffset = ptr - (uschar *)pattern;
3142    case OP_NOT_WHITESPACE: return (pcre_ctypes[c] & ctype_space) == 0;    return NULL;
   case OP_WHITESPACE:     return (pcre_ctypes[c] & ctype_space) != 0;  
   case OP_NOT_WORDCHAR:   return (pcre_ctypes[c] & ctype_word) == 0;  
   case OP_WORDCHAR:       return (pcre_ctypes[c] & ctype_word) != 0;  
3143    }    }
3144  return FALSE;  #endif
3145    
3146    return (pcre *)re;
3147  }  }
3148    
3149    
# Line 2236  return FALSE; Line 3152  return FALSE;
3152  *          Match a back-reference                *  *          Match a back-reference                *
3153  *************************************************/  *************************************************/
3154    
3155  /* If a back reference hasn't been set, the match fails.  /* If a back reference hasn't been set, the length that is passed is greater
3156    than the number of characters left in the string, so the match fails.
3157    
3158  Arguments:  Arguments:
3159    number      reference number    offset      index into the offset vector
3160    eptr        points into the subject    eptr        points into the subject
3161    length      length to be matched    length      length to be matched
3162    md          points to match data block    md          points to match data block
3163      ims         the ims flags
3164    
3165  Returns:      TRUE if matched  Returns:      TRUE if matched
3166  */  */
3167    
3168  static BOOL  static BOOL
3169  match_ref(int number, register const uschar *eptr, int length, match_data *md)  match_ref(int offset, register const uschar *eptr, int length, match_data *md,
3170      unsigned long int ims)
3171  {  {
3172  const uschar *p = md->start_subject + md->offset_vector[number];  const uschar *p = md->start_subject + md->offset_vector[offset];
3173    
3174  #ifdef DEBUG  #ifdef DEBUG
3175  if (eptr >= md->end_subject)  if (eptr >= md->end_subject)
# Line 2267  printf("\n"); Line 3186  printf("\n");
3186    
3187  /* Always fail if not enough characters left */  /* Always fail if not enough characters left */
3188    
3189  if (length > md->end_subject - p) return FALSE;  if (length > md->end_subject - eptr) return FALSE;
3190    
3191  /* Separate the caselesss case for speed */  /* Separate the caselesss case for speed */
3192    
3193  if (md->caseless)  if ((ims & PCRE_CASELESS) != 0)
3194    { while (length-- > 0) if (pcre_lcc[*p++] != pcre_lcc[*eptr++]) return FALSE; }    {
3195      while (length-- > 0)
3196        if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE;
3197      }
3198  else  else
3199    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
3200    
# Line 2285  return TRUE; Line 3207  return TRUE;
3207  *         Match from current position            *  *         Match from current position            *
3208  *************************************************/  *************************************************/
3209    
3210  /* On entry ecode points to the first opcode, and eptr to the first character.  /* On entry ecode points to the first opcode, and eptr to the first character
3211    in the subject string, while eptrb holds the value of eptr at the start of the
3212    last bracketed group - used for breaking infinite loops matching zero-length
3213    strings.
3214    
3215  Arguments:  Arguments:
3216     eptr        pointer in subject     eptr        pointer in subject
3217     ecode       position in code     ecode       position in code
3218     offset_top  current top pointer     offset_top  current top pointer
3219     md          pointer to "static" info for the match     md          pointer to "static" info for the match
3220       ims         current /i, /m, and /s options
3221       eptrb       pointer to chain of blocks containing eptr at start of
3222                     brackets - for testing for empty matches
3223       flags       can contain
3224                     match_condassert - this is an assertion condition
3225                     match_isgroup - this is the start of a bracketed group
3226    
3227  Returns:       TRUE if matched  Returns:       TRUE if matched
3228  */  */
3229    
3230  static BOOL  static BOOL
3231  match(register const uschar *eptr, register const uschar *ecode, int offset_top,  match(register const uschar *eptr, register const uschar *ecode,
3232    match_data *md)    int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,
3233      int flags)
3234  {  {
3235    unsigned long int original_ims = ims;   /* Save for resetting on ')' */
3236    eptrblock newptrb;
3237    
3238    /* At the start of a bracketed group, add the current subject pointer to the
3239    stack of such pointers, to be re-instated at the end of the group when we hit
3240    the closing ket. When match() is called in other circumstances, we don't add to
3241    the stack. */
3242    
3243    if ((flags & match_isgroup) != 0)
3244      {
3245      newptrb.prev = eptrb;
3246      newptrb.saved_eptr = eptr;
3247      eptrb = &newptrb;
3248      }
3249    
3250    /* Now start processing the operations. */
3251    
3252  for (;;)  for (;;)
3253    {    {
3254      int op = (int)*ecode;
3255    int min, max, ctype;    int min, max, ctype;
3256    register int i;    register int i;
3257    register int c;    register int c;
3258    BOOL minimize = FALSE;    BOOL minimize = FALSE;
3259    
3260    /* Opening bracket. Check the alternative branches in turn, failing if none    /* Opening capturing bracket. If there is space in the offset vector, save
3261    match. We have to set the start offset if required and there is space    the current subject position in the working slot at the top of the vector. We
3262    in the offset vector so that it is available for subsequent back references    mustn't change the current values of the data slot, because they may be set
3263    if the bracket matches. However, if the bracket fails, we must put back the    from a previous iteration of this group, and be referred to by a reference
3264    previous value of both offsets in case they were set by a previous copy of    inside the group.
3265    the same bracket. Don't worry about setting the flag for the error case here;  
3266    that is handled in the code for KET. */    If the bracket fails to match, we need to restore this value and also the
3267      values of the final offsets, in case they were set by a previous iteration of
3268      the same bracket.
3269    
3270      If there isn't enough space in the offset vector, treat this as if it were a
3271      non-capturing bracket. Don't worry about setting the flag for the error case
3272      here; that is handled in the code for KET. */
3273    
3274    if ((int)*ecode >= OP_BRA)    if (op > OP_BRA)
3275      {      {
3276      int number = (*ecode - OP_BRA) << 1;      int number = op - OP_BRA;
3277      int save_offset1 = 0, save_offset2 = 0;      int offset = number << 1;
3278    
3279      DPRINTF(("start bracket %d\n", number/2));  #ifdef DEBUG
3280        printf("start bracket %d subject=", number);
3281        pchars(eptr, 16, TRUE, md);
3282        printf("\n");
3283    #endif
3284    
3285      if (number > 0 && number < md->offset_end)      if (offset < md->offset_max)
3286        {        {
3287        save_offset1 = md->offset_vector[number];        int save_offset1 = md->offset_vector[offset];
3288        save_offset2 = md->offset_vector[number+1];        int save_offset2 = md->offset_vector[offset+1];
3289        md->offset_vector[number] = eptr - md->start_subject;        int save_offset3 = md->offset_vector[md->offset_end - number];
3290    
3291          DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
3292          md->offset_vector[md->offset_end - number] = eptr - md->start_subject;
3293    
3294          do
3295            {
3296            if (match(eptr, ecode+3, offset_top, md, ims, eptrb, match_isgroup))
3297              return TRUE;
3298            ecode += (ecode[1] << 8) + ecode[2];
3299            }
3300          while (*ecode == OP_ALT);
3301    
3302        DPRINTF(("saving %d %d\n", save_offset1, save_offset2));        DPRINTF(("bracket %d failed\n", number));
3303    
3304          md->offset_vector[offset] = save_offset1;
3305          md->offset_vector[offset+1] = save_offset2;
3306          md->offset_vector[md->offset_end - number] = save_offset3;
3307          return FALSE;
3308        }        }
3309    
3310      /* Recurse for all the alternatives. */      /* Insufficient room for saving captured contents */
3311    
3312        else op = OP_BRA;
3313        }
3314    
3315      /* Other types of node can be handled by a switch */
3316    
3317      switch(op)
3318        {
3319        case OP_BRA:     /* Non-capturing bracket: optimized */
3320        DPRINTF(("start bracket 0\n"));
3321      do      do
3322        {        {
3323        if (match(eptr, ecode+3, offset_top, md)) return TRUE;        if (match(eptr, ecode+3, offset_top, md, ims, eptrb, match_isgroup))
3324            return TRUE;
3325        ecode += (ecode[1] << 8) + ecode[2];        ecode += (ecode[1] << 8) + ecode[2];
3326        }        }
3327      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
3328        DPRINTF(("bracket 0 failed\n"));
3329        return FALSE;
3330    
3331        /* Conditional group: compilation checked that there are no more than
3332        two branches. If the condition is false, skipping the first branch takes us
3333        past the end if there is only one branch, but that's OK because that is
3334        exactly what going to the ket would do. */
3335    
3336        case OP_COND:
3337        if (ecode[3] == OP_CREF)         /* Condition is extraction test */
3338          {
3339          int offset = ecode[4] << 1;    /* Doubled reference number */
3340          return match(eptr,
3341            ecode + ((offset < offset_top && md->offset_vector[offset] >= 0)?
3342              5 : 3 + (ecode[1] << 8) + ecode[2]),
3343            offset_top, md, ims, eptrb, match_isgroup);
3344          }
3345    
3346      DPRINTF(("bracket %d failed\n", number/2));      /* The condition is an assertion. Call match() to evaluate it - setting
3347        the final argument TRUE causes it to stop at the end of an assertion. */
3348    
3349      if (number > 0 && number < md->offset_end)      else
3350        {        {
3351        md->offset_vector[number] = save_offset1;        if (match(eptr, ecode+3, offset_top, md, ims, NULL,
3352        md->offset_vector[number+1] = save_offset2;            match_condassert | match_isgroup))
3353            {
3354            ecode += 3 + (ecode[4] << 8) + ecode[5];
3355            while (*ecode == OP_ALT) ecode += (ecode[1] << 8) + ecode[2];
3356            }
3357          else ecode += (ecode[1] << 8) + ecode[2];
3358          return match(eptr, ecode+3, offset_top, md, ims, eptrb, match_isgroup);
3359        }        }
3360        /* Control never reaches here */
3361    
3362      return FALSE;      /* Skip over conditional reference data if encountered (should not be) */
     }  
3363    
3364    /* Other types of node can be handled by a switch */      case OP_CREF:
3365        ecode += 2;
3366        break;
3367    
3368        /* End of the pattern. If PCRE_NOTEMPTY is set, fail if we have matched
3369        an empty string - recursion will then try other alternatives, if any. */
3370    
   switch(*ecode)  
     {  
3371      case OP_END:      case OP_END:
3372        if (md->notempty && eptr == md->start_match) return FALSE;
3373      md->end_match_ptr = eptr;          /* Record where we ended */      md->end_match_ptr = eptr;          /* Record where we ended */
3374      md->end_offset_top = offset_top;   /* and how many extracts were taken */      md->end_offset_top = offset_top;   /* and how many extracts were taken */
3375      return TRUE;      return TRUE;
3376    
3377      /* The equivalent of Prolog's "cut" - if the rest doesn't match, the      /* Change option settings */
     whole thing doesn't match, so we have to get out via a longjmp(). */  
3378    
3379      case OP_CUT:      case OP_OPT:
3380      if (match(eptr, ecode+1, offset_top, md)) return TRUE;      ims = ecode[1];
3381      longjmp(md->fail_env, 1);      ecode += 2;
3382        DPRINTF(("ims set to %02lx\n", ims));
3383        break;
3384    
3385      /* Assertion brackets. Check the alternative branches in turn - the      /* Assertion brackets. Check the alternative branches in turn - the
3386      matching won't pass the KET for an assertion. If any one branch matches,      matching won't pass the KET for an assertion. If any one branch matches,
3387      the assertion is true. */      the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
3388        start of each branch to move the current point backwards, so the code at
3389        this level is identical to the lookahead case. */
3390    
3391      case OP_ASSERT:      case OP_ASSERT:
3392        case OP_ASSERTBACK:
3393      do      do
3394        {        {
3395        if (match(eptr, ecode+3, offset_top, md)) break;        if (match(eptr, ecode+3, offset_top, md, ims, NULL, match_isgroup)) break;
3396        ecode += (ecode[1] << 8) + ecode[2];        ecode += (ecode[1] << 8) + ecode[2];
3397        }        }
3398      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
3399      if (*ecode == OP_KET) return FALSE;      if (*ecode == OP_KET) return FALSE;
3400    
3401        /* If checking an assertion for a condition, return TRUE. */
3402    
3403        if ((flags & match_condassert) != 0) return TRUE;
3404    
3405      /* Continue from after the assertion, updating the offsets high water      /* Continue from after the assertion, updating the offsets high water
3406      mark, since extracts may have been taken during the assertion. */      mark, since extracts may have been taken during the assertion. */
3407    
# Line 2391  for (;;) Line 3413  for (;;)
3413      /* Negative assertion: all branches must fail to match */      /* Negative assertion: all branches must fail to match */
3414    
3415      case OP_ASSERT_NOT:      case OP_ASSERT_NOT:
3416        case OP_ASSERTBACK_NOT:
3417      do      do
3418        {        {
3419        if (match(eptr, ecode+3, offset_top, md)) return FALSE;        if (match(eptr, ecode+3, offset_top, md, ims, NULL, match_isgroup))
3420            return FALSE;
3421        ecode += (ecode[1] << 8) + ecode[2];        ecode += (ecode[1] << 8) + ecode[2];
3422        }        }
3423      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
3424    
3425        if ((flags & match_condassert) != 0) return TRUE;
3426    
3427      ecode += 3;      ecode += 3;
3428      continue;      continue;
3429    
3430        /* Move the subject pointer back. This occurs only at the start of
3431        each branch of a lookbehind assertion. If we are too close to the start to
3432        move back, this match function fails. */
3433    
3434        case OP_REVERSE:
3435        eptr -= (ecode[1] << 8) + ecode[2];
3436        if (eptr < md->start_subject) return FALSE;
3437        ecode += 3;
3438        break;
3439    
3440        /* Recursion matches the current regex, nested. If there are any capturing
3441        brackets started but not finished, we have to save their starting points
3442        and reinstate them after the recursion. However, we don't know how many
3443        such there are (offset_top records the completed total) so we just have
3444        to save all the potential data. There may be up to 99 such values, which
3445        is a bit large to put on the stack, but using malloc for small numbers
3446        seems expensive. As a compromise, the stack is used when there are fewer
3447        than 16 values to store; otherwise malloc is used. A problem is what to do
3448        if the malloc fails ... there is no way of returning to the top level with
3449        an error. Save the top 15 values on the stack, and accept that the rest
3450        may be wrong. */
3451    
3452        case OP_RECURSE:
3453          {
3454          BOOL rc;
3455          int *save;
3456          int stacksave[15];
3457    
3458          c = md->offset_max;
3459    
3460          if (c < 16) save = stacksave; else
3461            {
3462            save = (int *)(pcre_malloc)((c+1) * sizeof(int));
3463            if (save == NULL)
3464              {
3465              save = stacksave;
3466              c = 15;
3467              }
3468            }
3469    
3470          for (i = 1; i <= c; i++)
3471            save[i] = md->offset_vector[md->offset_end - i];
3472          rc = match(eptr, md->start_pattern, offset_top, md, ims, eptrb,
3473            match_isgroup);
3474          for (i = 1; i <= c; i++)
3475            md->offset_vector[md->offset_end - i] = save[i];
3476          if (save != stacksave) (pcre_free)(save);
3477          if (!rc) return FALSE;
3478    
3479          /* In case the recursion has set more capturing values, save the final
3480          number, then move along the subject till after the recursive match,
3481          and advance one byte in the pattern code. */
3482    
3483          offset_top = md->end_offset_top;
3484          eptr = md->end_match_ptr;
3485          ecode++;
3486          }
3487        break;
3488    
3489      /* "Once" brackets are like assertion brackets except that after a match,      /* "Once" brackets are like assertion brackets except that after a match,
3490      the point in the subject string is not moved back. Thus there can never be      the point in the subject string is not moved back. Thus there can never be
3491      a move back into the brackets. Check the alternative branches in turn - the      a move back into the brackets. Check the alternative branches in turn - the
3492      matching won't pass the KET for this kind of subpattern. If any one branch      matching won't pass the KET for this kind of subpattern. If any one branch
3493      matches, we carry on, leaving the subject pointer. */      matches, we carry on as at the end of a normal bracket, leaving the subject
3494        pointer. */
3495    
3496      case OP_ONCE:      case OP_ONCE:
     do  
3497        {        {
3498        if (match(eptr, ecode+3, offset_top, md)) break;        const uschar *prev = ecode;
3499        ecode += (ecode[1] << 8) + ecode[2];        const uschar *saved_eptr = eptr;
       }  
     while (*ecode == OP_ALT);  
     if (*ecode == OP_KET) return FALSE;  
3500    
3501      /* Continue as from after the assertion, updating the offsets high water        do
3502      mark, since extracts may have been taken. */          {
3503            if (match(eptr, ecode+3, offset_top, md, ims, eptrb, match_isgroup))
3504              break;
3505            ecode += (ecode[1] << 8) + ecode[2];
3506            }
3507          while (*ecode == OP_ALT);
3508    
3509      do ecode += (ecode[1] << 8) + ecode[2]; while (*ecode == OP_ALT);        /* If hit the end of the group (which could be repeated), fail */
3510      ecode += 3;  
3511      offset_top = md->end_offset_top;        if (*ecode != OP_ONCE && *ecode != OP_ALT) return FALSE;
3512      eptr = md->end_match_ptr;  
3513      continue;        /* Continue as from after the assertion, updating the offsets high water
3514          mark, since extracts may have been taken. */
3515    
3516          do ecode += (ecode[1] << 8) + ecode[2]; while (*ecode == OP_ALT);
3517    
3518          offset_top = md->end_offset_top;
3519          eptr = md->end_match_ptr;
3520    
3521          /* For a non-repeating ket, just continue at this level. This also
3522          happens for a repeating ket if no characters were matched in the group.
3523          This is the forcible breaking of infinite loops as implemented in Perl
3524          5.005. If there is an options reset, it will get obeyed in the normal
3525          course of events. */
3526    
3527          if (*ecode == OP_KET || eptr == saved_eptr)
3528            {
3529            ecode += 3;
3530            break;
3531            }
3532    
3533          /* The repeating kets try the rest of the pattern or restart from the
3534          preceding bracket, in the appropriate order. We need to reset any options
3535          that changed within the bracket before re-running it, so check the next
3536          opcode. */
3537    
3538          if (ecode[3] == OP_OPT)
3539            {
3540            ims = (ims & ~PCRE_IMS) | ecode[4];
3541            DPRINTF(("ims set to %02lx at group repeat\n", ims));
3542            }
3543    
3544          if (*ecode == OP_KETRMIN)
3545            {
3546            if (match(eptr, ecode+3, offset_top, md, ims, eptrb, 0) ||
3547                match(eptr, prev, offset_top, md, ims, eptrb, match_isgroup))
3548                  return TRUE;
3549            }
3550          else  /* OP_KETRMAX */
3551            {
3552            if (match(eptr, prev, offset_top, md, ims, eptrb, match_isgroup) ||
3553                match(eptr, ecode+3, offset_top, md, ims, eptrb, 0)) return TRUE;
3554            }
3555          }
3556        return FALSE;
3557    
3558      /* An alternation is the end of a branch; scan along to find the end of the      /* An alternation is the end of a branch; scan along to find the end of the
3559      bracketed group and go to there. */      bracketed group and go to there. */
# Line 2440  for (;;) Line 3571  for (;;)
3571      case OP_BRAZERO:      case OP_BRAZERO:
3572        {        {
3573        const uschar *next = ecode+1;        const uschar *next = ecode+1;
3574        if (match(eptr, next, offset_top, md)) return TRUE;        if (match(eptr, next, offset_top, md, ims, eptrb, match_isgroup))
3575            return TRUE;
3576        do next += (next[1] << 8) + next[2]; while (*next == OP_ALT);        do next += (next[1] << 8) + next[2]; while (*next == OP_ALT);
3577        ecode = next + 3;        ecode = next + 3;
3578        }        }
# Line 2450  for (;;) Line 3582  for (;;)
3582        {        {
3583        const uschar *next = ecode+1;        const uschar *next = ecode+1;
3584        do next += (next[1] << 8) + next[2]; while (*next == OP_ALT);        do next += (next[1] << 8) + next[2]; while (*next == OP_ALT);
3585        if (match(eptr, next+3, offset_top, md)) return TRUE;        if (match(eptr, next+3, offset_top, md, ims, eptrb, match_isgroup))
3586            return TRUE;
3587        ecode++;        ecode++;
3588        }        }
3589      break;;      break;
3590    
3591      /* End of a group, repeated or non-repeating. If we are at the end of      /* End of a group, repeated or non-repeating. If we are at the end of
3592      an assertion "group", stop matching and return TRUE, but record the      an assertion "group", stop matching and return TRUE, but record the
3593      current high water mark for use by positive assertions. */      current high water mark for use by positive assertions. Do this also
3594        for the "once" (not-backup up) groups. */
3595    
3596      case OP_KET:      case OP_KET:
3597      case OP_KETRMIN:      case OP_KETRMIN:
3598      case OP_KETRMAX:      case OP_KETRMAX:
3599        {        {
       int number;  
3600        const uschar *prev = ecode - (ecode[1] << 8) - ecode[2];        const uschar *prev = ecode - (ecode[1] << 8) - ecode[2];
3601          const uschar *saved_eptr = eptrb->saved_eptr;
3602    
3603        if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT || *prev == OP_ONCE)        eptrb = eptrb->prev;    /* Back up the stack of bracket start pointers */
3604    
3605          if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
3606              *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
3607              *prev == OP_ONCE)
3608          {          {
3609          md->end_match_ptr = eptr;      /* For ONCE */          md->end_match_ptr = eptr;      /* For ONCE */
3610          md->end_offset_top = offset_top;          md->end_offset_top = offset_top;
3611          return TRUE;          return TRUE;
3612          }          }
3613    
3614        /* In all other cases we have to check the group number back at the        /* In all other cases except a conditional group we have to check the
3615        start and if necessary complete handling an extraction by setting the        group number back at the start and if necessary complete handling an
3616        final offset and bumping the high water mark. */        extraction by setting the offsets and bumping the high water mark. */
3617    
3618        number = (*prev - OP_BRA) << 1;        if (*prev != OP_COND)
3619            {
3620            int number = *prev - OP_BRA;
3621            int offset = number << 1;
3622    
3623        DPRINTF(("end bracket %d\n", number/2));  #ifdef DEBUG
3624            printf("end bracket %d", number);
3625            printf("\n");
3626    #endif
3627    
3628        if (number > 0)          if (number > 0)
         {  
         if (number >= md->offset_end) md->offset_overflow = TRUE; else  
3629            {            {
3630            md->offset_vector[number+1] = eptr - md->start_subject;            if (offset >= md->offset_max) md->offset_overflow = TRUE; else
3631            if (offset_top <= number) offset_top = number + 2;              {
3632                md->offset_vector[offset] =
3633                  md->offset_vector[md->offset_end - number];
3634                md->offset_vector[offset+1] = eptr - md->start_subject;
3635                if (offset_top <= offset) offset_top = offset + 2;
3636                }
3637            }            }
3638          }          }
3639    
3640        /* For a non-repeating ket, just advance to the next node and continue at        /* Reset the value of the ims flags, in case they got changed during
3641        this level. */        the group. */
3642    
3643          ims = original_ims;
3644          DPRINTF(("ims reset to %02lx\n", ims));
3645    
3646        if (*ecode == OP_KET)        /* For a non-repeating ket, just continue at this level. This also
3647          happens for a repeating ket if no characters were matched in the group.
3648          This is the forcible breaking of infinite loops as implemented in Perl
3649          5.005. If there is an options reset, it will get obeyed in the normal
3650          course of events. */
3651    
3652          if (*ecode == OP_KET || eptr == saved_eptr)
3653          {          {
3654          ecode += 3;          ecode += 3;
3655          break;          break;
# Line 2504  for (;;) Line 3660  for (;;)
3660    
3661        if (*ecode == OP_KETRMIN)        if (*ecode == OP_KETRMIN)
3662          {          {
3663          if (match(eptr, ecode+3, offset_top, md) ||          if (match(eptr, ecode+3, offset_top, md, ims, eptrb, 0) ||
3664              match(eptr, prev, offset_top, md)) return TRUE;              match(eptr, prev, offset_top, md, ims, eptrb, match_isgroup))
3665                  return TRUE;
3666          }          }
3667        else  /* OP_KETRMAX */        else  /* OP_KETRMAX */
3668          {          {
3669          if (match(eptr, prev, offset_top, md) ||          if (match(eptr, prev, offset_top, md, ims, eptrb, match_isgroup) ||
3670              match(eptr, ecode+3, offset_top, md)) return TRUE;              match(eptr, ecode+3, offset_top, md, ims, eptrb, 0)) return TRUE;
3671          }          }
3672        }        }
3673      return FALSE;      return FALSE;
# Line 2519  for (;;) Line 3676  for (;;)
3676    
3677      case OP_CIRC:      case OP_CIRC:
3678      if (md->notbol && eptr == md->start_subject) return FALSE;      if (md->notbol && eptr == md->start_subject) return FALSE;
3679      if (md->multiline)      if ((ims & PCRE_MULTILINE) != 0)
3680        {        {
3681        if (eptr != md->start_subject && eptr[-1] != '\n') return FALSE;        if (eptr != md->start_subject && eptr[-1] != '\n') return FALSE;
3682        ecode++;        ecode++;
# Line 2534  for (;;) Line 3691  for (;;)
3691      ecode++;      ecode++;
3692      break;      break;
3693    
3694      /* Assert before internal newline if multiline, or before      /* Assert before internal newline if multiline, or before a terminating
3695      a terminating newline unless endonly is set, else end of subject unless      newline unless endonly is set, else end of subject unless noteol is set. */
     noteol is set. */  
3696    
3697      case OP_DOLL:      case OP_DOLL:
3698      if (md->noteol && eptr >= md->end_subject) return FALSE;      if ((ims & PCRE_MULTILINE) != 0)
     if (md->multiline)  
3699        {        {
3700        if (eptr < md->end_subject && *eptr != '\n') return FALSE;        if (eptr < md->end_subject) { if (*eptr != '\n') return FALSE; }
3701            else { if (md->noteol) return FALSE; }
3702        ecode++;        ecode++;
3703        break;        break;
3704        }        }
3705      else if (!md->endonly)      else
3706        {        {
3707        if (eptr < md->end_subject - 1 ||        if (md->noteol) return FALSE;
3708           (eptr == md->end_subject - 1 && *eptr != '\n')) return FALSE;        if (!md->endonly)
3709        ecode++;          {
3710        break;          if (eptr < md->end_subject - 1 ||
3711               (eptr == md->end_subject - 1 && *eptr != '\n')) return FALSE;
3712    
3713            ecode++;
3714            break;
3715            }
3716        }        }
3717      /* ... else fall through */      /* ... else fall through */
3718    
3719      /* End of subject assertion */      /* End of subject assertion (\z) */
3720    
3721      case OP_EOD:      case OP_EOD:
3722      if (eptr < md->end_subject) return FALSE;      if (eptr < md->end_subject) return FALSE;
3723      ecode++;      ecode++;
3724      break;      break;
3725    
3726        /* End of subject or ending \n assertion (\Z) */
3727    
3728        case OP_EODN:
3729        if (eptr < md->end_subject - 1 ||
3730           (eptr == md->end_subject - 1 && *eptr != '\n')) return FALSE;
3731        ecode++;
3732        break;
3733    
3734      /* Word boundary assertions */      /* Word boundary assertions */
3735    
3736      case OP_NOT_WORD_BOUNDARY:      case OP_NOT_WORD_BOUNDARY:
3737      case OP_WORD_BOUNDARY:      case OP_WORD_BOUNDARY:
3738        {        {
3739        BOOL prev_is_word = (eptr != md->start_subject) &&        BOOL prev_is_word = (eptr != md->start_subject) &&
3740          ((pcre_ctypes[eptr[-1]] & ctype_word) != 0);          ((md->ctypes[eptr[-1]] & ctype_word) != 0);
3741        BOOL cur_is_word = (eptr < md->end_subject) &&        BOOL cur_is_word = (eptr < md->end_subject) &&
3742          ((pcre_ctypes[*eptr] & ctype_word) != 0);          ((md->ctypes[*eptr] & ctype_word) != 0);
3743        if ((*ecode++ == OP_WORD_BOUNDARY)?        if ((*ecode++ == OP_WORD_BOUNDARY)?
3744             cur_is_word == prev_is_word : cur_is_word != prev_is_word)             cur_is_word == prev_is_word : cur_is_word != prev_is_word)
3745          return FALSE;          return FALSE;
# Line 2580  for (;;) Line 3749  for (;;)
3749      /* Match a single character type; inline for speed */      /* Match a single character type; inline for speed */
3750    
3751      case OP_ANY:      case OP_ANY:
3752      if (!md->dotall && eptr < md->end_subject && *eptr == '\n') return FALSE;      if ((ims & PCRE_DOTALL) == 0 && eptr < md->end_subject && *eptr == '\n')
3753          return FALSE;
3754      if (eptr++ >= md->end_subject) return FALSE;      if (eptr++ >= md->end_subject) return FALSE;
3755      ecode++;      ecode++;
3756      break;      break;
3757    
3758      case OP_NOT_DIGIT:      case OP_NOT_DIGIT:
3759      if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_digit) != 0)      if (eptr >= md->end_subject ||
3760           (md->ctypes[*eptr++] & ctype_digit) != 0)
3761        return FALSE;        return FALSE;
3762      ecode++;      ecode++;
3763      break;      break;
3764    
3765      case OP_DIGIT:      case OP_DIGIT:
3766      if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_digit) == 0)      if (eptr >= md->end_subject ||
3767           (md->ctypes[*eptr++] & ctype_digit) == 0)
3768        return FALSE;        return FALSE;
3769      ecode++;      ecode++;
3770      break;      break;
3771    
3772      case OP_NOT_WHITESPACE:      case OP_NOT_WHITESPACE:
3773      if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_space) != 0)      if (eptr >= md->end_subject ||
3774           (md->ctypes[*eptr++] & ctype_space) != 0)
3775        return FALSE;        return FALSE;
3776      ecode++;      ecode++;
3777      break;      break;
3778    
3779      case OP_WHITESPACE:      case OP_WHITESPACE:
3780      if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_space) == 0)      if (eptr >= md->end_subject ||
3781           (md->ctypes[*eptr++] & ctype_space) == 0)
3782        return FALSE;        return FALSE;
3783      ecode++;      ecode++;
3784      break;      break;
3785    
3786      case OP_NOT_WORDCHAR:      case OP_NOT_WORDCHAR:
3787      if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_word) != 0)      if (eptr >= md->end_subject ||
3788           (md->ctypes[*eptr++] & ctype_word) != 0)
3789        return FALSE;        return FALSE;
3790      ecode++;      ecode++;
3791      break;      break;
3792    
3793      case OP_WORDCHAR:      case OP_WORDCHAR:
3794      if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_word) == 0)      if (eptr >= md->end_subject ||
3795           (md->ctypes[*eptr++] & ctype_word) == 0)
3796        return FALSE;        return FALSE;
3797      ecode++;      ecode++;
3798      break;      break;
# Line 2632  for (;;) Line 3808  for (;;)
3808      case OP_REF:      case OP_REF:
3809        {        {
3810        int length;        int length;
3811        int number = ecode[1] << 1;                /* Doubled reference number */        int offset = ecode[1] << 1;                /* Doubled reference number */
3812        ecode += 2;                                /* Advance past the item */        ecode += 2;                                /* Advance past the item */
3813    
3814        if (number >= offset_top || md->offset_vector[number] < 0)        /* If the reference is unset, set the length to be longer than the amount
3815          {        of subject left; this ensures that every attempt at a match fails. We
3816          md->errorcode = PCRE_ERROR_BADREF;        can't just fail here, because of the possibility of quantifiers with zero
3817          return FALSE;        minima. */
3818          }  
3819          length = (offset >= offset_top || md->offset_vector[offset] < 0)?
3820            md->end_subject - eptr + 1 :
3821            md->offset_vector[offset+1] - md->offset_vector[offset];
3822    
3823        length = md->offset_vector[number+1] - md->offset_vector[number];        /* Set up for repetition, or handle the non-repeated case */
3824    
3825        switch (*ecode)        switch (*ecode)
3826          {          {
# Line 2668  for (;;) Line 3847  for (;;)
3847          break;          break;
3848    
3849          default:               /* No repeat follows */          default:               /* No repeat follows */
3850          if (!match_ref(number, eptr, length, md)) return FALSE;          if (!match_ref(offset, eptr, length, md, ims)) return FALSE;
3851          eptr += length;          eptr += length;
3852          continue;              /* With the main loop */          continue;              /* With the main loop */
3853          }          }
# Line 2684  for (;;) Line 3863  for (;;)
3863    
3864        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
3865          {          {
3866          if (!match_ref(number, eptr, length, md)) return FALSE;          if (!match_ref(offset, eptr, length, md, ims)) return FALSE;
3867          eptr += length;          eptr += length;
3868          }          }
3869    
# Line 2699  for (;;) Line 3878  for (;;)
3878          {          {
3879          for (i = min;; i++)          for (i = min;; i++)
3880            {            {
3881            if (match(eptr, ecode, offset_top, md)) return TRUE;            if (match(eptr, ecode, offset_top, md, ims, eptrb, 0))
3882            if (i >= max || !match_ref(number, eptr, length, md))              return TRUE;
3883              if (i >= max || !match_ref(offset, eptr, length, md, ims))
3884              return FALSE;              return FALSE;
3885            eptr += length;            eptr += length;
3886            }            }
# Line 2714  for (;;) Line 3894  for (;;)
3894          const uschar *pp = eptr;          const uschar *pp = eptr;
3895          for (i = min; i < max; i++)          for (i = min; i < max; i++)
3896            {            {
3897            if (!match_ref(number, eptr, length, md)) break;            if (!match_ref(offset, eptr, length, md, ims)) break;
3898            eptr += length;            eptr += length;
3899            }            }
3900          while (eptr >= pp)          while (eptr >= pp)
3901            {            {
3902            if (match(eptr, ecode, offset_top, md)) return TRUE;            if (match(eptr, ecode, offset_top, md, ims, eptrb, 0))
3903                return TRUE;
3904            eptr -= length;            eptr -= length;
3905            }            }
3906          return FALSE;          return FALSE;
# Line 2727  for (;;) Line 3908  for (;;)
3908        }        }
3909      /* Control never gets here */      /* Control never gets here */
3910    
3911    
3912    
3913      /* Match a character class, possibly repeatedly. Look past the end of the      /* Match a character class, possibly repeatedly. Look past the end of the
3914      item to see if there is repeat information following. Then obey similar      item to see if there is repeat information following. Then obey similar
3915      code to character type repeats - written out again for speed. If caseless      code to character type repeats - written out again for speed. */
     matching was set at runtime but not at compile time, we have to check both  
     versions of a character, and we have to behave differently for positive and  
     negative classes. This is the only time where OP_CLASS and OP_NEGCLASS are  
     treated differently. */  
3916    
3917      case OP_CLASS:      case OP_CLASS:
     case OP_NEGCLASS:  
3918        {        {
       BOOL nasty_case = *ecode == OP_NEGCLASS && md->runtime_caseless;  
3919        const uschar *data = ecode + 1;  /* Save for matching */        const uschar *data = ecode + 1;  /* Save for matching */
3920        ecode += 33;                     /* Advance past the item */        ecode += 33;                     /* Advance past the item */
3921    
# Line 2777  for (;;) Line 3954  for (;;)
3954          {          {
3955          if (eptr >= md->end_subject) return FALSE;          if (eptr >= md->end_subject) return FALSE;
3956          c = *eptr++;          c = *eptr++;
3957            if ((data[c/8] & (1 << (c&7))) != 0) continue;
         /* Either not runtime caseless, or it was a positive class. For  
         runtime caseless, continue if either case is in the map. */  
   
         if (!nasty_case)  
           {  
           if ((data[c/8] & (1 << (c&7))) != 0) continue;  
           if (md->runtime_caseless)  
             {  
             c = pcre_fcc[c];  
             if ((data[c/8] & (1 << (c&7))) != 0) continue;  
             }  
           }  
   
         /* Runtime caseless and it was a negative class. Continue only if  
         both cases are in the map. */  
   
         else  
           {  
           if ((data[c/8] & (1 << (c&7))) == 0) return FALSE;  
           c = pcre_fcc[c];  
           if ((data[c/8] & (1 << (c&7))) != 0) continue;  
           }  
   
3958          return FALSE;          return FALSE;
3959          }          }
3960    
# Line 2816  for (;;) Line 3970  for (;;)
3970          {          {
3971          for (i = min;; i++)          for (i = min;; i++)
3972            {            {
3973            if (match(eptr, ecode, offset_top, md)) return TRUE;            if (match(eptr, ecode, offset_top, md, ims, eptrb, 0))
3974                return TRUE;
3975            if (i >= max || eptr >= md->end_subject) return FALSE;            if (i >= max || eptr >= md->end_subject) return FALSE;
3976            c = *eptr++;            c = *eptr++;
3977              if ((data[c/8] & (1 << (c&7))) != 0) continue;
           /* Either not runtime caseless, or it was a positive class. For  
           runtime caseless, continue if either case is in the map. */  
   
           if (!nasty_case)  
             {  
             if ((data[c/8] & (1 << (c&7))) != 0) continue;  
             if (md->runtime_caseless)  
               {  
               c = pcre_fcc[c];  
               if ((data[c/8] & (1 << (c&7))) != 0) continue;  
               }  
             }  
   
           /* Runtime caseless and it was a negative class. Continue only if  
           both cases are in the map. */  
   
           else  
             {  
             if ((data[c/8] & (1 << (c&7))) == 0) return FALSE;  
             c = pcre_fcc[c];  
             if ((data[c/8] & (1 << (c&7))) != 0) continue;  
             }  
   
3978            return FALSE;            return FALSE;
3979            }            }
3980          /* Control never gets here */          /* Control never gets here */
# Line 2857  for (;;) Line 3989  for (;;)
3989            {            {
3990            if (eptr >= md->end_subject) break;            if (eptr >= md->end_subject) break;
3991            c = *eptr;            c = *eptr;
3992              if ((data[c/8] & (1 << (c&7))) != 0) continue;
           /* Either not runtime caseless, or it was a positive class. For  
           runtime caseless, continue if either case is in the map. */  
   
           if (!nasty_case)  
             {  
             if ((data[c/8] & (1 << (c&7))) != 0) continue;  
             if (md->runtime_caseless)  
               {  
               c = pcre_fcc[c];  
               if ((data[c/8] & (1 << (c&7))) != 0) continue;  
               }  
             }  
   
           /* Runtime caseless and it was a negative class. Continue only if  
           both cases are in the map. */  
   
           else  
             {  
             if ((data[c/8] & (1 << (c&7))) == 0) break;  
             c = pcre_fcc[c];  
             if ((data[c/8] & (1 << (c&7))) != 0) continue;  
             }  
   
3993            break;            break;
3994            }            }
3995    
3996          while (eptr >= pp)          while (eptr >= pp)
3997            if (match(eptr--, ecode, offset_top, md)) return TRUE;            if (match(eptr--, ecode, offset_top, md, ims, eptrb, 0))
3998                return TRUE;
3999          return FALSE;          return FALSE;
4000          }          }
4001        }        }
# Line 2912  for (;;) Line 4022  for (;;)
4022  #endif  #endif
4023    
4024        if (length > md->end_subject - eptr) return FALSE;        if (length > md->end_subject - eptr) return FALSE;
4025        if (md->caseless)        if ((ims & PCRE_CASELESS) != 0)
4026          {          {
4027          while (length-- > 0) if (pcre_lcc[*ecode++] != pcre_lcc[*eptr++]) return FALSE;          while (length-- > 0)
4028              if (md->lcc[*ecode++] != md->lcc[*eptr++])
4029                return FALSE;
4030          }          }
4031        else        else
4032          {          {
# Line 2969  for (;;) Line 4081  for (;;)
4081      DPRINTF(("matching %c{%d,%d} against subject %.*s\n", c, min, max,      DPRINTF(("matching %c{%d,%d} against subject %.*s\n", c, min, max,
4082        max, eptr));        max, eptr));
4083    
4084      if (md->caseless)      if ((ims & PCRE_CASELESS) != 0)
4085        {        {
4086        c = pcre_lcc[c];        c = md->lcc[c];
4087        for (i = 1; i <= min; i++) if (c != pcre_lcc[*eptr++]) return FALSE;        for (i = 1; i <= min; i++)
4088            if (c != md->lcc[*eptr++]) return FALSE;
4089        if (min == max) continue;        if (min == max) continue;
4090        if (minimize)        if (minimize)
4091          {          {
4092          for (i = min;; i++)          for (i = min;; i++)
4093            {            {
4094            if (match(eptr, ecode, offset_top, md)) return TRUE;            if (match(eptr, ecode, offset_top, md, ims, eptrb, 0))
4095            if (i >= max || eptr >= md->end_subject || c != pcre_lcc[*eptr++])              return TRUE;
4096              if (i >= max || eptr >= md->end_subject ||
4097                  c != md->lcc[*eptr++])
4098              return FALSE;              return FALSE;
4099            }            }
4100          /* Control never gets here */          /* Control never gets here */
# Line 2989  for (;;) Line 4104  for (;;)
4104          const uschar *pp = eptr;          const uschar *pp = eptr;
4105          for (i = min; i < max; i++)          for (i = min; i < max; i++)
4106            {            {
4107            if (eptr >= md->end_subject || c != pcre_lcc[*eptr]) break;            if (eptr >= md->end_subject || c != md->lcc[*eptr]) break;
4108            eptr++;            eptr++;
4109            }            }
4110          while (eptr >= pp)          while (eptr >= pp)
4111            if (match(eptr--, ecode, offset_top, md)) return TRUE;            if (match(eptr--, ecode, offset_top, md, ims, eptrb, 0))
4112                return TRUE;
4113          return FALSE;          return FALSE;
4114          }          }
4115        /* Control never gets here */        /* Control never gets here */
# Line 3009  for (;;) Line 4125  for (;;)
4125          {          {
4126          for (i = min;; i++)          for (i = min;; i++)
4127            {            {
4128            if (match(eptr, ecode, offset_top, md)) return TRUE;            if (match(eptr, ecode, offset_top, md, ims, eptrb, 0))
4129                return TRUE;
4130            if (i >= max || eptr >= md->end_subject || c != *eptr++) return FALSE;            if (i >= max || eptr >= md->end_subject || c != *eptr++) return FALSE;
4131            }            }
4132          /* Control never gets here */          /* Control never gets here */
# Line 3023  for (;;) Line 4140  for (;;)
4140            eptr++;            eptr++;
4141            }            }
4142          while (eptr >= pp)          while (eptr >= pp)
4143           if (match(eptr--, ecode, offset_top, md)) return TRUE;           if (match(eptr--, ecode, offset_top, md, ims, eptrb, 0))
4144               return TRUE;
4145          return FALSE;          return FALSE;
4146          }          }
4147        }        }
# Line 3034  for (;;) Line 4152  for (;;)
4152      case OP_NOT:      case OP_NOT:
4153      if (eptr >= md->end_subject) return FALSE;      if (eptr >= md->end_subject) return FALSE;
4154      ecode++;      ecode++;
4155      if (md->caseless)      if ((ims & PCRE_CASELESS) != 0)
4156        {        {
4157        if (pcre_lcc[*ecode++] == pcre_lcc[*eptr++]) return FALSE;        if (md->lcc[*ecode++] == md->lcc[*eptr++]) return FALSE;
4158        }        }
4159      else      else
4160        {        {
# Line 3094  for (;;) Line 4212  for (;;)
4212      DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", c, min, max,      DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", c, min, max,
4213        max, eptr));        max, eptr));
4214    
4215      if (md->caseless)      if ((ims & PCRE_CASELESS) != 0)
4216        {        {
4217        c = pcre_lcc[c];        c = md->lcc[c];
4218        for (i = 1; i <= min; i++) if (c == pcre_lcc[*eptr++]) return FALSE;        for (i = 1; i <= min; i++)
4219            if (c == md->lcc[*eptr++]) return FALSE;
4220        if (min == max) continue;        if (min == max) continue;
4221        if (minimize)        if (minimize)
4222          {          {
4223          for (i = min;; i++)          for (i = min;; i++)
4224            {            {
4225            if (match(eptr, ecode, offset_top, md)) return TRUE;            if (match(eptr, ecode, offset_top, md, ims, eptrb, 0))
4226            if (i >= max || eptr >= md->end_subject || c == pcre_lcc[*eptr++])              return TRUE;
4227              if (i >= max || eptr >= md->end_subject ||
4228                  c == md->lcc[*eptr++])
4229              return FALSE;              return FALSE;
4230            }            }
4231          /* Control never gets here */          /* Control never gets here */
# Line 3114  for (;;) Line 4235  for (;;)
4235          const uschar *pp = eptr;          const uschar *pp = eptr;
4236          for (i = min; i < max; i++)          for (i = min; i < max; i++)
4237            {            {
4238            if (eptr >= md->end_subject || c == pcre_lcc[*eptr]) break;            if (eptr >= md->end_subject || c == md->lcc[*eptr]) break;
4239            eptr++;            eptr++;
4240            }            }
4241          while (eptr >= pp)          while (eptr >= pp)
4242            if (match(eptr--, ecode, offset_top, md)) return TRUE;            if (match(eptr--, ecode, offset_top, md, ims, eptrb, 0))
4243                return TRUE;
4244          return FALSE;          return FALSE;
4245          }          }
4246        /* Control never gets here */        /* Control never gets here */
# Line 3134  for (;;) Line 4256  for (;;)
4256          {          {
4257          for (i = min;; i++)          for (i = min;; i++)
4258            {            {
4259            if (match(eptr, ecode, offset_top, md)) return TRUE;            if (match(eptr, ecode, offset_top, md, ims, eptrb, 0))
4260                return TRUE;
4261            if (i >= max || eptr >= md->end_subject || c == *eptr++) return FALSE;            if (i >= max || eptr >= md->end_subject || c == *eptr++) return FALSE;
4262            }            }
4263          /* Control never gets here */          /* Control never gets here */
# Line 3148  for (;;) Line 4271  for (;;)
4271            eptr++;            eptr++;
4272            }            }
4273          while (eptr >= pp)          while (eptr >= pp)
4274           if (match(eptr--, ecode, offset_top, md)) return TRUE;           if (match(eptr--, ecode, offset_top, md, ims, eptrb, 0))
4275               return TRUE;
4276          return FALSE;          return FALSE;
4277          }          }
4278        }        }
# Line 3198  for (;;) Line 4322  for (;;)
4322      if (min > 0) switch(ctype)      if (min > 0) switch(ctype)
4323        {        {
4324        case OP_ANY:        case OP_ANY:
4325        if (!md->dotall)        if ((ims & PCRE_DOTALL) == 0)
4326          { for (i = 1; i <= min; i++) if (*eptr++ == '\n') return FALSE; }          { for (i = 1; i <= min; i++) if (*eptr++ == '\n') return FALSE; }
4327        else eptr += min;        else eptr += min;
4328        break;        break;
4329    
4330        case OP_NOT_DIGIT:        case OP_NOT_DIGIT:
4331        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
4332          if ((pcre_ctypes[*eptr++] & ctype_digit) != 0) return FALSE;          if ((md->ctypes[*eptr++] & ctype_digit) != 0) return FALSE;
4333        break;        break;
4334    
4335        case OP_DIGIT:        case OP_DIGIT:
4336        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)