/[pcre]/code/trunk/pcre.c
ViewVC logotype

Diff of /code/trunk/pcre.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 15 by nigel, Sat Feb 24 21:38:25 2007 UTC revision 39 by nigel, Sat Feb 24 21:39:13 2007 UTC
# Line 9  the file Tech.Notes for some information Line 9  the file Tech.Notes for some information
9    
10  Written by: Philip Hazel <ph10@cam.ac.uk>  Written by: Philip Hazel <ph10@cam.ac.uk>
11    
12             Copyright (c) 1998 University of Cambridge             Copyright (c) 1997-1999 University of Cambridge
13    
14  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
15  Permission is granted to anyone to use this software for any purpose on any  Permission is granted to anyone to use this software for any purpose on any
# Line 25  restrictions: Line 25  restrictions:
25    
26  3. Altered versions must be plainly marked as such, and must not be  3. Altered versions must be plainly marked as such, and must not be
27     misrepresented as being the original software.     misrepresented as being the original software.
28    
29    4. If PCRE is embedded in any software that is released under the GNU
30       General Purpose Licence (GPL), then the terms of that licence shall
31       supersede any condition above with which it is incompatible.
32  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
33  */  */
34    
# Line 33  restrictions: Line 37  restrictions:
37    
38  /* #define DEBUG */  /* #define DEBUG */
39    
40  /* Use a macro for debugging printing, 'cause that eliminates the the use  /* Use a macro for debugging printing, 'cause that eliminates the use of #ifdef
41  of #ifdef inline, and there are *still* stupid compilers about that don't like  inline, and there are *still* stupid compilers about that don't like indented
42  indented pre-processor statements. I suppose it's only been 10 years... */  pre-processor statements. I suppose it's only been 10 years... */
43    
44  #ifdef DEBUG  #ifdef DEBUG
45  #define DPRINTF(p) printf p  #define DPRINTF(p) printf p
# Line 56  the external pcre header. */ Line 60  the external pcre header. */
60  #endif  #endif
61    
62    
63    /* Number of items on the nested bracket stacks at compile time. This should
64    not be set greater than 200. */
65    
66    #define BRASTACK_SIZE 200
67    
68    
69  /* Min and max values for the common repeats; for the maxima, 0 => infinity */  /* Min and max values for the common repeats; for the maxima, 0 => infinity */
70    
71  static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };  static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
# Line 66  static const char rep_max[] = { 0, 0, 0, Line 76  static const char rep_max[] = { 0, 0, 0,
76  #ifdef DEBUG  #ifdef DEBUG
77  static const char *OP_names[] = {  static const char *OP_names[] = {
78    "End", "\\A", "\\B", "\\b", "\\D", "\\d",    "End", "\\A", "\\B", "\\b", "\\D", "\\d",
79    "\\S", "\\s", "\\W", "\\w", "Cut", "\\Z", "^", "$", "Any", "chars",    "\\S", "\\s", "\\W", "\\w", "\\Z", "\\z",
80    "not",    "Opt", "^", "$", "Any", "chars", "not",
81    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
82    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
83    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
84    "*", "*?", "+", "+?", "?", "??", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{",
85    "class", "negclass", "Ref",    "class", "Ref",
86    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", "Once",    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",
87      "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",
88    "Brazero", "Braminzero", "Bra"    "Brazero", "Braminzero", "Bra"
89  };  };
90  #endif  #endif
# Line 93  static const short int escapes[] = { Line 104  static const short int escapes[] = {
104    '`',      7, -ESC_b,      0, -ESC_d,     27,   '\f',      0,   /* ` - g */    '`',      7, -ESC_b,      0, -ESC_d,     27,   '\f',      0,   /* ` - g */
105      0,      0,      0,      0,      0,      0,   '\n',      0,   /* h - o */      0,      0,      0,      0,      0,      0,   '\n',      0,   /* h - o */
106      0,      0,   '\r', -ESC_s,   '\t',      0,      0, -ESC_w,   /* p - w */      0,      0,   '\r', -ESC_s,   '\t',      0,      0, -ESC_w,   /* p - w */
107      0,      0,      0                                            /* x - z */      0,      0, -ESC_z                                            /* x - z */
108  };  };
109    
110  /* Definition to allow mutual recursion */  /* Definition to allow mutual recursion */
111    
112  static BOOL  static BOOL
113    compile_regex(int, int *, uschar **, const uschar **, const char **);    compile_regex(int, int, int *, uschar **, const uschar **, const char **,
114        BOOL, int, int *, int *, compile_data *);
 /* Structure for passing "static" information around between the functions  
 doing the matching, so that they are thread-safe. */  
   
 typedef struct match_data {  
   int    errorcode;             /* As it says */  
   int   *offset_vector;         /* Offset vector */  
   int    offset_end;            /* One past the end */  
   BOOL   offset_overflow;       /* Set if too many extractions */  
   BOOL   caseless;              /* Case-independent flag */  
   BOOL   runtime_caseless;      /* Caseless forced at run time */  
   BOOL   multiline;             /* Multiline flag */  
   BOOL   notbol;                /* NOTBOL flag */  
   BOOL   noteol;                /* NOTEOL flag */  
   BOOL   dotall;                /* Dot matches any char */  
   BOOL   endonly;               /* Dollar not before final \n */  
   const uschar *start_subject;  /* Start of the subject string */  
   const uschar *end_subject;    /* End of the subject string */  
   jmp_buf fail_env;             /* Environment for longjump() break out */  
   const uschar *end_match_ptr;  /* Subject position at end match */  
   int     end_offset_top;       /* Highwater mark at end of match */  
 } match_data;  
115    
116    
117    
# Line 141  void  (*pcre_free)(void *) = free; Line 131  void  (*pcre_free)(void *) = free;
131    
132    
133  /*************************************************  /*************************************************
134    *             Default character tables           *
135    *************************************************/
136    
137    /* A default set of character tables is included in the PCRE binary. Its source
138    is built by the maketables auxiliary program, which uses the default C ctypes
139    functions, and put in the file chartables.c. These tables are used by PCRE
140    whenever the caller of pcre_compile() does not provide an alternate set of
141    tables. */
142    
143    #include "chartables.c"
144    
145    
146    
147    /*************************************************
148  *          Return version string                 *  *          Return version string                 *
149  *************************************************/  *************************************************/
150    
151    #define STRING(a)  # a
152    #define XSTRING(s) STRING(s)
153    
154  const char *  const char *
155  pcre_version(void)  pcre_version(void)
156  {  {
157  return PCRE_VERSION;  return XSTRING(PCRE_MAJOR) "." XSTRING(PCRE_MINOR) " " XSTRING(PCRE_DATE);
158  }  }
159    
160    
# Line 158  return PCRE_VERSION; Line 165  return PCRE_VERSION;
165  *************************************************/  *************************************************/
166    
167  /* This function picks potentially useful data out of the private  /* This function picks potentially useful data out of the private
168  structure.  structure. The public options are passed back in an int - though the
169    re->options field has been expanded to a long int, all the public options
170    at the low end of it, and so even on 16-bit systems this will still be OK.
171    Therefore, I haven't changed the API for pcre_info().
172    
173  Arguments:  Arguments:
174    external_re   points to compiled code    external_re   points to compiled code
# Line 177  pcre_info(const pcre *external_re, int * Line 187  pcre_info(const pcre *external_re, int *
187  const real_pcre *re = (const real_pcre *)external_re;  const real_pcre *re = (const real_pcre *)external_re;
188  if (re == NULL) return PCRE_ERROR_NULL;  if (re == NULL) return PCRE_ERROR_NULL;
189  if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;  if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;
190  if (optptr != NULL) *optptr = (re->options & PUBLIC_OPTIONS);  if (optptr != NULL) *optptr = (int)(re->options & PUBLIC_OPTIONS);
191  if (first_char != NULL)  if (first_char != NULL)
192    *first_char = ((re->options & PCRE_FIRSTSET) != 0)? re->first_char :    *first_char = ((re->options & PCRE_FIRSTSET) != 0)? re->first_char :
193       ((re->options & PCRE_STARTLINE) != 0)? -1 : -2;       ((re->options & PCRE_STARTLINE) != 0)? -1 : -2;
# Line 218  while (length-- > 0) Line 228  while (length-- > 0)
228    
229    
230  /*************************************************  /*************************************************
 *         Check subpattern for empty operand     *  
 *************************************************/  
   
 /* This function checks a bracketed subpattern to see if any of the paths  
 through it could match an empty string. This is used to diagnose an error if  
 such a subpattern is followed by a quantifier with an unlimited upper bound.  
   
 Argument:  
   code      points to the opening bracket  
   
 Returns:    TRUE or FALSE  
 */  
   
 static BOOL  
 could_be_empty(uschar *code)  
 {  
 do {  
   uschar *cc = code + 3;  
   
   /* Scan along the opcodes for this branch; as soon as we find something  
   that matches a non-empty string, break out and advance to test the next  
   branch. If we get to the end of the branch, return TRUE for the whole  
   sub-expression. */  
   
   for (;;)  
     {  
     /* Test an embedded subpattern; if it could not be empty, break the  
     loop. Otherwise carry on in the branch. */  
   
     if ((int)(*cc) >= OP_BRA || (int)(*cc) == OP_ONCE)  
       {  
       if (!could_be_empty(cc)) break;  
       do cc += (cc[1] << 8) + cc[2]; while (*cc == OP_ALT);  
       cc += 3;  
       }  
   
     else switch (*cc)  
       {  
       /* Reached end of a branch: the subpattern may match the empty string */  
   
       case OP_ALT:  
       case OP_KET:  
       case OP_KETRMAX:  
       case OP_KETRMIN:  
       return TRUE;  
   
       /* Skip over assertive subpatterns */  
   
       case OP_ASSERT:  
       case OP_ASSERT_NOT:  
       do cc += (cc[1] << 8) + cc[2]; while (*cc == OP_ALT);  
       cc += 3;  
       break;  
   
       /* Skip over things that don't match chars */  
   
       case OP_SOD:  
       case OP_EOD:  
       case OP_CIRC:  
       case OP_DOLL:  
       case OP_BRAZERO:  
       case OP_BRAMINZERO:  
       case OP_NOT_WORD_BOUNDARY:  
       case OP_WORD_BOUNDARY:  
       cc++;  
       break;  
   
       /* Skip over simple repeats with zero lower bound */  
   
       case OP_STAR:  
       case OP_MINSTAR:  
       case OP_QUERY:  
       case OP_MINQUERY:  
       case OP_NOTSTAR:  
       case OP_NOTMINSTAR:  
       case OP_NOTQUERY:  
       case OP_NOTMINQUERY:  
       case OP_TYPESTAR:  
       case OP_TYPEMINSTAR:  
       case OP_TYPEQUERY:  
       case OP_TYPEMINQUERY:  
       cc += 2;  
       break;  
   
       /* Skip over UPTOs (lower bound is zero) */  
   
       case OP_UPTO:  
       case OP_MINUPTO:  
       case OP_TYPEUPTO:  
       case OP_TYPEMINUPTO:  
       cc += 4;  
       break;  
   
       /* Check a class or a back reference for a zero minimum */  
   
       case OP_CLASS:  
       case OP_NEGCLASS:  
       case OP_REF:  
       cc += (*cc == OP_REF)? 2 : 33;  
   
       switch (*cc)  
         {  
         case OP_CRSTAR:  
         case OP_CRMINSTAR:  
         case OP_CRQUERY:  
         case OP_CRMINQUERY:  
         cc++;  
         break;  
   
         case OP_CRRANGE:  
         case OP_CRMINRANGE:  
         if ((cc[1] << 8) + cc[2] != 0) goto NEXT_BRANCH;  
         cc += 3;  
         break;  
   
         default:  
         goto NEXT_BRANCH;  
         }  
       break;  
   
       /* Anything else matches at least one character */  
   
       default:  
       goto NEXT_BRANCH;  
       }  
     }  
   
   NEXT_BRANCH:  
   code += (code[1] << 8) + code[2];  
   }  
 while (*code == OP_ALT);  
   
 /* No branches match the empty string */  
   
 return FALSE;  
 }  
   
   
   
 /*************************************************  
231  *            Handle escapes                      *  *            Handle escapes                      *
232  *************************************************/  *************************************************/
233    
# Line 373  Arguments: Line 243  Arguments:
243    bracount   number of previous extracting brackets    bracount   number of previous extracting brackets
244    options    the options bits    options    the options bits
245    isclass    TRUE if inside a character class    isclass    TRUE if inside a character class
246      cd         pointer to char tables block
247    
248  Returns:     zero or positive => a data character  Returns:     zero or positive => a data character
249               negative => a special escape sequence               negative => a special escape sequence
# Line 381  Returns:     zero or positive => a data Line 252  Returns:     zero or positive => a data
252    
253  static int  static int
254  check_escape(const uschar **ptrptr, const char **errorptr, int bracount,  check_escape(const uschar **ptrptr, const char **errorptr, int bracount,
255    int options, BOOL isclass)    int options, BOOL isclass, compile_data *cd)
256  {  {
257  const uschar *ptr = *ptrptr;  const uschar *ptr = *ptrptr;
258  int c = *(++ptr) & 255;   /* Ensure > 0 on signed-char systems */  int c = *(++ptr) & 255;   /* Ensure > 0 on signed-char systems */
# Line 424  else Line 295  else
295        {        {
296        oldptr = ptr;        oldptr = ptr;
297        c -= '0';        c -= '0';
298        while ((pcre_ctypes[ptr[1]] & ctype_digit) != 0)        while ((cd->ctypes[ptr[1]] & ctype_digit) != 0)
299          c = c * 10 + *(++ptr) - '0';          c = c * 10 + *(++ptr) - '0';
300        if (c < 10 || c <= bracount)        if (c < 10 || c <= bracount)
301          {          {
# Line 450  else Line 321  else
321    
322      case '0':      case '0':
323      c -= '0';      c -= '0';
324      while(i++ < 2 && (pcre_ctypes[ptr[1]] & ctype_digit) != 0 &&      while(i++ < 2 && (cd->ctypes[ptr[1]] & ctype_digit) != 0 &&
325        ptr[1] != '8' && ptr[1] != '9')        ptr[1] != '8' && ptr[1] != '9')
326          c = c * 8 + *(++ptr) - '0';          c = c * 8 + *(++ptr) - '0';
327      break;      break;
# Line 459  else Line 330  else
330    
331      case 'x':      case 'x':
332      c = 0;      c = 0;
333      while (i++ < 2 && (pcre_ctypes[ptr[1]] & ctype_xdigit) != 0)      while (i++ < 2 && (cd->ctypes[ptr[1]] & ctype_xdigit) != 0)
334        {        {
335        ptr++;        ptr++;
336        c = c * 16 + pcre_lcc[*ptr] -        c = c * 16 + cd->lcc[*ptr] -
337          (((pcre_ctypes[*ptr] & ctype_digit) != 0)? '0' : 'W');          (((cd->ctypes[*ptr] & ctype_digit) != 0)? '0' : 'W');
338        }        }
339      break;      break;
340    
# Line 477  else Line 348  else
348    
349      /* A letter is upper-cased; then the 0x40 bit is flipped */      /* A letter is upper-cased; then the 0x40 bit is flipped */
350    
351      if (c >= 'a' && c <= 'z') c = pcre_fcc[c];      if (c >= 'a' && c <= 'z') c = cd->fcc[c];
352      c ^= 0x40;      c ^= 0x40;
353      break;      break;
354    
355      /* PCRE_EXTRA enables extensions to Perl in the matter of escapes. Any      /* PCRE_EXTRA enables extensions to Perl in the matter of escapes. Any
356      other alphameric following \ is an error if PCRE_EXTRA was set; otherwise,      other alphameric following \ is an error if PCRE_EXTRA was set; otherwise,
357      for Perl compatibility, it is a literal. */      for Perl compatibility, it is a literal. This code looks a bit odd, but
358        there used to be some cases other than the default, and there may be again
359        in future, so I haven't "optimized" it. */
360    
361      default:      default:
362      if ((options & PCRE_EXTRA) != 0) switch(c)      if ((options & PCRE_EXTRA) != 0) switch(c)
363        {        {
       case 'X':  
       c = -ESC_X;      /* This could be a lookup if it ever got into Perl */  
       break;  
   
364        default:        default:
365        *errorptr = ERR3;        *errorptr = ERR3;
366        break;        break;
# Line 517  where the ddds are digits. Line 386  where the ddds are digits.
386    
387  Arguments:  Arguments:
388    p         pointer to the first char after '{'    p         pointer to the first char after '{'
389      cd        pointer to char tables block
390    
391  Returns:    TRUE or FALSE  Returns:    TRUE or FALSE
392  */  */
393    
394  static BOOL  static BOOL
395  is_counted_repeat(const uschar *p)  is_counted_repeat(const uschar *p, compile_data *cd)
396  {  {
397  if ((pcre_ctypes[*p++] & ctype_digit) == 0) return FALSE;  if ((cd->ctypes[*p++] & ctype_digit) == 0) return FALSE;
398  while ((pcre_ctypes[*p] & ctype_digit) != 0) p++;  while ((cd->ctypes[*p] & ctype_digit) != 0) p++;
399  if (*p == '}') return TRUE;  if (*p == '}') return TRUE;
400    
401  if (*p++ != ',') return FALSE;  if (*p++ != ',') return FALSE;
402  if (*p == '}') return TRUE;  if (*p == '}') return TRUE;
403    
404  if ((pcre_ctypes[*p++] & ctype_digit) == 0) return FALSE;  if ((cd->ctypes[*p++] & ctype_digit) == 0) return FALSE;
405  while ((pcre_ctypes[*p] & ctype_digit) != 0) p++;  while ((cd->ctypes[*p] & ctype_digit) != 0) p++;
406  return (*p == '}');  return (*p == '}');
407  }  }
408    
# Line 552  Arguments: Line 422  Arguments:
422    maxp       pointer to int for max    maxp       pointer to int for max
423               returned as -1 if no max               returned as -1 if no max
424    errorptr   points to pointer to error message    errorptr   points to pointer to error message
425      cd         pointer to character tables clock
426    
427  Returns:     pointer to '}' on success;  Returns:     pointer to '}' on success;
428               current ptr on error, with errorptr set               current ptr on error, with errorptr set
429  */  */
430    
431  static const uschar *  static const uschar *
432  read_repeat_counts(const uschar *p, int *minp, int *maxp, const char **errorptr)  read_repeat_counts(const uschar *p, int *minp, int *maxp,
433      const char **errorptr, compile_data *cd)
434  {  {
435  int min = 0;  int min = 0;
436  int max = -1;  int max = -1;
437    
438  while ((pcre_ctypes[*p] & ctype_digit) != 0) min = min * 10 + *p++ - '0';  while ((cd->ctypes[*p] & ctype_digit) != 0) min = min * 10 + *p++ - '0';
439    
440  if (*p == '}') max = min; else  if (*p == '}') max = min; else
441    {    {
442    if (*(++p) != '}')    if (*(++p) != '}')
443      {      {
444      max = 0;      max = 0;
445      while((pcre_ctypes[*p] & ctype_digit) != 0) max = max * 10 + *p++ - '0';      while((cd->ctypes[*p] & ctype_digit) != 0) max = max * 10 + *p++ - '0';
446      if (max < min)      if (max < min)
447        {        {
448        *errorptr = ERR4;        *errorptr = ERR4;
# Line 595  return p; Line 467  return p;
467    
468    
469  /*************************************************  /*************************************************
470    *        Find the fixed length of a pattern      *
471    *************************************************/
472    
473    /* Scan a pattern and compute the fixed length of subject that will match it,
474    if the length is fixed. This is needed for dealing with backward assertions.
475    
476    Arguments:
477      code     points to the start of the pattern (the bracket)
478    
479    Returns:   the fixed length, or -1 if there is no fixed length
480    */
481    
482    static int
483    find_fixedlength(uschar *code)
484    {
485    int length = -1;
486    
487    register int branchlength = 0;
488    register uschar *cc = code + 3;
489    
490    /* Scan along the opcodes for this branch. If we get to the end of the
491    branch, check the length against that of the other branches. */
492    
493    for (;;)
494      {
495      int d;
496      register int op = *cc;
497      if (op >= OP_BRA) op = OP_BRA;
498    
499      switch (op)
500        {
501        case OP_BRA:
502        case OP_ONCE:
503        case OP_COND:
504        d = find_fixedlength(cc);
505        if (d < 0) return -1;
506        branchlength += d;
507        do cc += (cc[1] << 8) + cc[2]; while (*cc == OP_ALT);
508        cc += 3;
509        break;
510    
511        /* Reached end of a branch; if it's a ket it is the end of a nested
512        call. If it's ALT it is an alternation in a nested call. If it is
513        END it's the end of the outer call. All can be handled by the same code. */
514    
515        case OP_ALT:
516        case OP_KET:
517        case OP_KETRMAX:
518        case OP_KETRMIN:
519        case OP_END:
520        if (length < 0) length = branchlength;
521          else if (length != branchlength) return -1;
522        if (*cc != OP_ALT) return length;
523        cc += 3;
524        branchlength = 0;
525        break;
526    
527        /* Skip over assertive subpatterns */
528    
529        case OP_ASSERT:
530        case OP_ASSERT_NOT:
531        case OP_ASSERTBACK:
532        case OP_ASSERTBACK_NOT:
533        do cc += (cc[1] << 8) + cc[2]; while (*cc == OP_ALT);
534        cc += 3;
535        break;
536    
537        /* Skip over things that don't match chars */
538    
539        case OP_REVERSE:
540        cc++;
541        /* Fall through */
542    
543        case OP_CREF:
544        case OP_OPT:
545        cc++;
546        /* Fall through */
547    
548        case OP_SOD:
549        case OP_EOD:
550        case OP_EODN:
551        case OP_CIRC:
552        case OP_DOLL:
553        case OP_NOT_WORD_BOUNDARY:
554        case OP_WORD_BOUNDARY:
555        cc++;
556        break;
557    
558        /* Handle char strings */
559    
560        case OP_CHARS:
561        branchlength += *(++cc);
562        cc += *cc + 1;
563        break;
564    
565        /* Handle exact repetitions */
566    
567        case OP_EXACT:
568        case OP_TYPEEXACT:
569        branchlength += (cc[1] << 8) + cc[2];
570        cc += 4;
571        break;
572    
573        /* Handle single-char matchers */
574    
575        case OP_NOT_DIGIT:
576        case OP_DIGIT:
577        case OP_NOT_WHITESPACE:
578        case OP_WHITESPACE:
579        case OP_NOT_WORDCHAR:
580        case OP_WORDCHAR:
581        case OP_ANY:
582        branchlength++;
583        cc++;
584        break;
585    
586    
587        /* Check a class for variable quantification */
588    
589        case OP_CLASS:
590        cc += (*cc == OP_REF)? 2 : 33;
591    
592        switch (*cc)
593          {
594          case OP_CRSTAR:
595          case OP_CRMINSTAR:
596          case OP_CRQUERY:
597          case OP_CRMINQUERY:
598          return -1;
599    
600          case OP_CRRANGE:
601          case OP_CRMINRANGE:
602          if ((cc[1] << 8) + cc[2] != (cc[3] << 8) + cc[4]) return -1;
603          branchlength += (cc[1] << 8) + cc[2];
604          cc += 5;
605          break;
606    
607          default:
608          branchlength++;
609          }
610        break;
611    
612        /* Anything else is variable length */
613    
614        default:
615        return -1;
616        }
617      }
618    /* Control never gets here */
619    }
620    
621    
622    
623    
624    /*************************************************
625  *           Compile one branch                   *  *           Compile one branch                   *
626  *************************************************/  *************************************************/
627    
628  /* Scan the pattern, compiling it into the code vector.  /* Scan the pattern, compiling it into the code vector.
629    
630  Arguments:  Arguments:
631    options    the option bits    options      the option bits
632    bracket    points to number of brackets used    brackets     points to number of brackets used
633    code       points to the pointer to the current code point    code         points to the pointer to the current code point
634    ptrptr     points to the current pattern pointer    ptrptr       points to the current pattern pointer
635    errorptr   points to pointer to error message    errorptr     points to pointer to error message
636      optchanged   set to the value of the last OP_OPT item compiled
637      reqchar      set to the last literal character required, else -1
638      countlits    set to count of mandatory literal characters
639      cd           contains pointers to tables
640    
641  Returns:     TRUE on success  Returns:       TRUE on success
642               FALSE, with *errorptr set on error                 FALSE, with *errorptr set on error
643  */  */
644    
645  static BOOL  static BOOL
646  compile_branch(int options, int *brackets, uschar **codeptr,  compile_branch(int options, int *brackets, uschar **codeptr,
647    const uschar **ptrptr, const char **errorptr)    const uschar **ptrptr, const char **errorptr, int *optchanged,
648      int *reqchar, int *countlits, compile_data *cd)
649  {  {
650  int repeat_type, op_type;  int repeat_type, op_type;
651  int repeat_min, repeat_max;  int repeat_min, repeat_max;
652  int bravalue, length;  int bravalue, length;
653    int greedy_default, greedy_non_default;
654    int prevreqchar;
655    int condcount = 0;
656    int subcountlits = 0;
657  register int c;  register int c;
658  register uschar *code = *codeptr;  register uschar *code = *codeptr;
659    uschar *tempcode;
660  const uschar *ptr = *ptrptr;  const uschar *ptr = *ptrptr;
661  const uschar *oldptr;  const uschar *tempptr;
662  uschar *previous = NULL;  uschar *previous = NULL;
663  uschar class[32];  uschar class[32];
664    
665    /* Set up the default and non-default settings for greediness */
666    
667    greedy_default = ((options & PCRE_UNGREEDY) != 0);
668    greedy_non_default = greedy_default ^ 1;
669    
670    /* Initialize no required char, and count of literals */
671    
672    *reqchar = prevreqchar = -1;
673    *countlits = 0;
674    
675  /* Switch on next character until the end of the branch */  /* Switch on next character until the end of the branch */
676    
677  for (;; ptr++)  for (;; ptr++)
678    {    {
679    BOOL negate_class;    BOOL negate_class;
680    int  class_charcount;    int class_charcount;
681    int  class_lastchar;    int class_lastchar;
682      int newoptions;
683      int condref;
684      int subreqchar;
685    
686    c = *ptr;    c = *ptr;
687    if ((options & PCRE_EXTENDED) != 0)    if ((options & PCRE_EXTENDED) != 0)
688      {      {
689      if ((pcre_ctypes[c] & ctype_space) != 0) continue;      if ((cd->ctypes[c] & ctype_space) != 0) continue;
690      if (c == '#')      if (c == '#')
691        {        {
692        while ((c = *(++ptr)) != 0 && c != '\n');        while ((c = *(++ptr)) != 0 && c != '\n');
# Line 679  for (;; ptr++) Line 729  for (;; ptr++)
729    
730      case '[':      case '[':
731      previous = code;      previous = code;
732        *code++ = OP_CLASS;
733    
734      /* If the first character is '^', set the negation flag, and use a      /* If the first character is '^', set the negation flag and skip it. */
     different opcode. This only matters if caseless matching is specified at  
     runtime. */  
735    
736      if ((c = *(++ptr)) == '^')      if ((c = *(++ptr)) == '^')
737        {        {
738        negate_class = TRUE;        negate_class = TRUE;
       *code++ = OP_NEGCLASS;  
739        c = *(++ptr);        c = *(++ptr);
740        }        }
741      else      else negate_class = FALSE;
       {  
       negate_class = FALSE;  
       *code++ = OP_CLASS;  
       }  
742    
743      /* Keep a count of chars so that we can optimize the case of just a single      /* Keep a count of chars so that we can optimize the case of just a single
744      character. */      character. */
# Line 730  for (;; ptr++) Line 774  for (;; ptr++)
774    
775        if (c == '\\')        if (c == '\\')
776          {          {
777          c = check_escape(&ptr, errorptr, *brackets, options, TRUE);          c = check_escape(&ptr, errorptr, *brackets, options, TRUE, cd);
778          if (-c == ESC_b) c = '\b';          if (-c == ESC_b) c = '\b';
779          else if (c < 0)          else if (c < 0)
780            {            {
781              register const uschar *cbits = cd->cbits;
782            class_charcount = 10;            class_charcount = 10;
783            switch (-c)            switch (-c)
784              {              {
785              case ESC_d:              case ESC_d:
786              for (c = 0; c < 32; c++) class[c] |= pcre_cbits[c+cbit_digit];              for (c = 0; c < 32; c++) class[c] |= cbits[c+cbit_digit];
787              continue;              continue;
788    
789              case ESC_D:              case ESC_D:
790              for (c = 0; c < 32; c++) class[c] |= ~pcre_cbits[c+cbit_digit];              for (c = 0; c < 32; c++) class[c] |= ~cbits[c+cbit_digit];
791              continue;              continue;
792    
793              case ESC_w:              case ESC_w:
794              for (c = 0; c < 32; c++)              for (c = 0; c < 32; c++)
795                class[c] |= (pcre_cbits[c] | pcre_cbits[c+cbit_word]);                class[c] |= (cbits[c+cbit_digit] | cbits[c+cbit_word]);
796              continue;              continue;
797    
798              case ESC_W:              case ESC_W:
799              for (c = 0; c < 32; c++)              for (c = 0; c < 32; c++)
800                class[c] |= ~(pcre_cbits[c] | pcre_cbits[c+cbit_word]);                class[c] |= ~(cbits[c+cbit_digit] | cbits[c+cbit_word]);
801              continue;              continue;
802    
803              case ESC_s:              case ESC_s:
804              for (c = 0; c < 32; c++) class[c] |= pcre_cbits[c+cbit_space];              for (c = 0; c < 32; c++) class[c] |= cbits[c+cbit_space];
805              continue;              continue;
806    
807              case ESC_S:              case ESC_S:
808              for (c = 0; c < 32; c++) class[c] |= ~pcre_cbits[c+cbit_space];              for (c = 0; c < 32; c++) class[c] |= ~cbits[c+cbit_space];
809              continue;              continue;
810    
811              default:              default:
# Line 792  for (;; ptr++) Line 837  for (;; ptr++)
837    
838          if (d == '\\')          if (d == '\\')
839            {            {
840            d = check_escape(&ptr, errorptr, *brackets, options, TRUE);            d = check_escape(&ptr, errorptr, *brackets, options, TRUE, cd);
841            if (d < 0)            if (d < 0)
842              {              {
843              if (d == -ESC_b) d = '\b'; else              if (d == -ESC_b) d = '\b'; else
# Line 814  for (;; ptr++) Line 859  for (;; ptr++)
859            class[c/8] |= (1 << (c&7));            class[c/8] |= (1 << (c&7));
860            if ((options & PCRE_CASELESS) != 0)            if ((options & PCRE_CASELESS) != 0)
861              {              {
862              int uc = pcre_fcc[c];           /* flip case */              int uc = cd->fcc[c];           /* flip case */
863              class[uc/8] |= (1 << (uc&7));              class[uc/8] |= (1 << (uc&7));
864              }              }
865            class_charcount++;                /* in case a one-char range */            class_charcount++;                /* in case a one-char range */
# Line 829  for (;; ptr++) Line 874  for (;; ptr++)
874        class [c/8] |= (1 << (c&7));        class [c/8] |= (1 << (c&7));
875        if ((options & PCRE_CASELESS) != 0)        if ((options & PCRE_CASELESS) != 0)
876          {          {
877          c = pcre_fcc[c];   /* flip case */          c = cd->fcc[c];   /* flip case */
878          class[c/8] |= (1 << (c&7));          class[c/8] |= (1 << (c&7));
879          }          }
880        class_charcount++;        class_charcount++;
# Line 876  for (;; ptr++) Line 921  for (;; ptr++)
921      /* Various kinds of repeat */      /* Various kinds of repeat */
922    
923      case '{':      case '{':
924      if (!is_counted_repeat(ptr+1)) goto NORMAL_CHAR;      if (!is_counted_repeat(ptr+1, cd)) goto NORMAL_CHAR;
925      ptr = read_repeat_counts(ptr+1, &repeat_min, &repeat_max, errorptr);      ptr = read_repeat_counts(ptr+1, &repeat_min, &repeat_max, errorptr, cd);
926      if (*errorptr != NULL) goto FAILED;      if (*errorptr != NULL) goto FAILED;
927      goto REPEAT;      goto REPEAT;
928    
# Line 902  for (;; ptr++) Line 947  for (;; ptr++)
947        goto FAILED;        goto FAILED;
948        }        }
949    
950      /* If the next character is '?' this is a minimizing repeat. Advance to the      /* If the next character is '?' this is a minimizing repeat, by default,
951        but if PCRE_UNGREEDY is set, it works the other way round. Advance to the
952      next character. */      next character. */
953    
954      if (ptr[1] == '?') { repeat_type = 1; ptr++; } else repeat_type = 0;      if (ptr[1] == '?')
955          { repeat_type = greedy_non_default; ptr++; }
956      /* If the maximum is zero then the minimum must also be zero; Perl allows      else repeat_type = greedy_default;
     this case, so we do too - by simply omitting the item altogether. */  
   
     if (repeat_max == 0) code = previous;  
957    
958      /* If previous was a string of characters, chop off the last one and use it      /* If previous was a string of characters, chop off the last one and use it
959      as the subject of the repeat. If there was only one character, we can      as the subject of the repeat. If there was only one character, we can
960      abolish the previous item altogether. */      abolish the previous item altogether. A repeat with a zero minimum wipes
961        out any reqchar setting, backing up to the previous value. We must also
962        adjust the countlits value. */
963    
964      else if (*previous == OP_CHARS)      if (*previous == OP_CHARS)
965        {        {
966        int len = previous[1];        int len = previous[1];
967    
968          if (repeat_min == 0) *reqchar = prevreqchar;
969          *countlits += repeat_min - 1;
970    
971        if (len == 1)        if (len == 1)
972          {          {
973          c = previous[2];          c = previous[2];
# Line 950  for (;; ptr++) Line 999  for (;; ptr++)
999      create a suitable repeat item. The code is shared with single-character      create a suitable repeat item. The code is shared with single-character
1000      repeats by adding a suitable offset into repeat_type. */      repeats by adding a suitable offset into repeat_type. */
1001    
1002      else if ((int)*previous < OP_EOD || *previous == OP_ANY)      else if ((int)*previous < OP_EODN || *previous == OP_ANY)
1003        {        {
1004        op_type = OP_TYPESTAR - OP_STAR;  /* Use type opcodes */        op_type = OP_TYPESTAR - OP_STAR;  /* Use type opcodes */
1005        c = *previous;        c = *previous;
1006        code = previous;        code = previous;
1007    
1008        OUTPUT_SINGLE_REPEAT:        OUTPUT_SINGLE_REPEAT:
1009        repeat_type += op_type;      /* Combine both values for many cases */  
1010          /* If the maximum is zero then the minimum must also be zero; Perl allows
1011          this case, so we do too - by simply omitting the item altogether. */
1012    
1013          if (repeat_max == 0) goto END_REPEAT;
1014    
1015          /* Combine the op_type with the repeat_type */
1016    
1017          repeat_type += op_type;
1018    
1019        /* A minimum of zero is handled either as the special case * or ?, or as        /* A minimum of zero is handled either as the special case * or ?, or as
1020        an UPTO, with the maximum given. */        an UPTO, with the maximum given. */
# Line 994  for (;; ptr++) Line 1051  for (;; ptr++)
1051          /* If the mininum is 1 and the previous item was a character string,          /* If the mininum is 1 and the previous item was a character string,
1052          we either have to put back the item that got cancelled if the string          we either have to put back the item that got cancelled if the string
1053          length was 1, or add the character back onto the end of a longer          length was 1, or add the character back onto the end of a longer
1054          string. For a character type nothing need be done; it will just get put          string. For a character type nothing need be done; it will just get
1055          back naturally. */          put back naturally. Note that the final character is always going to
1056            get added below. */
1057    
1058          else if (*previous == OP_CHARS)          else if (*previous == OP_CHARS)
1059            {            {
1060            if (code == previous) code += 2; else previous[1]++;            if (code == previous) code += 2; else previous[1]++;
1061            }            }
1062    
1063            /*  For a single negated character we also have to put back the
1064            item that got cancelled. */
1065    
1066            else if (*previous == OP_NOT) code++;
1067    
1068          /* If the maximum is unlimited, insert an OP_STAR. */          /* If the maximum is unlimited, insert an OP_STAR. */
1069    
1070          if (repeat_max < 0)          if (repeat_max < 0)
# Line 1028  for (;; ptr++) Line 1091  for (;; ptr++)
1091        }        }
1092    
1093      /* If previous was a character class or a back reference, we put the repeat      /* If previous was a character class or a back reference, we put the repeat
1094      stuff after it. */      stuff after it, but just skip the item if the repeat was {0,0}. */
1095    
1096      else if (*previous == OP_CLASS || *previous == OP_NEGCLASS ||      else if (*previous == OP_CLASS || *previous == OP_REF)
              *previous == OP_REF)  
1097        {        {
1098          if (repeat_max == 0)
1099            {
1100            code = previous;
1101            goto END_REPEAT;
1102            }
1103        if (repeat_min == 0 && repeat_max == -1)        if (repeat_min == 0 && repeat_max == -1)
1104          *code++ = OP_CRSTAR + repeat_type;          *code++ = OP_CRSTAR + repeat_type;
1105        else if (repeat_min == 1 && repeat_max == -1)        else if (repeat_min == 1 && repeat_max == -1)
# Line 1051  for (;; ptr++) Line 1118  for (;; ptr++)
1118        }        }
1119    
1120      /* If previous was a bracket group, we may have to replicate it in certain      /* If previous was a bracket group, we may have to replicate it in certain
1121      cases. If the maximum repeat count is unlimited, check that the bracket      cases. */
     group cannot match the empty string, and diagnose an error if it can. */  
1122    
1123      else if ((int)*previous >= OP_BRA)      else if ((int)*previous >= OP_BRA || (int)*previous == OP_ONCE ||
1124                 (int)*previous == OP_COND)
1125        {        {
1126        int i;        register int i;
1127          int ketoffset = 0;
1128        int len = code - previous;        int len = code - previous;
1129          uschar *bralink = NULL;
1130    
1131        if (repeat_max == -1 && could_be_empty(previous))        /* If the maximum repeat count is unlimited, find the end of the bracket
1132          by scanning through from the start, and compute the offset back to it
1133          from the current code pointer. There may be an OP_OPT setting following
1134          the final KET, so we can't find the end just by going back from the code
1135          pointer. */
1136    
1137          if (repeat_max == -1)
1138            {
1139            register uschar *ket = previous;
1140            do ket += (ket[1] << 8) + ket[2]; while (*ket != OP_KET);
1141            ketoffset = code - ket;
1142            }
1143    
1144          /* The case of a zero minimum is special because of the need to stick
1145          OP_BRAZERO in front of it, and because the group appears once in the
1146          data, whereas in other cases it appears the minimum number of times. For
1147          this reason, it is simplest to treat this case separately, as otherwise
1148          the code gets far too mess. There are several special subcases when the
1149          minimum is zero. */
1150    
1151          if (repeat_min == 0)
1152          {          {
1153          *errorptr = ERR10;          /* If we set up a required char from the bracket, we must back off
1154          goto FAILED;          to the previous value and reset the countlits value too. */
1155          }  
1156            if (subcountlits > 0)
1157              {
1158              *reqchar = prevreqchar;
1159              *countlits -= subcountlits;
1160              }
1161    
1162        /* If the minimum is greater than zero, and the maximum is unlimited or          /* If the maximum is also zero, we just omit the group from the output
1163        equal to the minimum, the first copy remains where it is, and is          altogether. */
       replicated up to the minimum number of times. This case includes the +  
       repeat, but of course no replication is needed in that case. */  
1164    
1165        if (repeat_min > 0 && (repeat_max == -1 || repeat_max == repeat_min))          if (repeat_max == 0)
         {  
         for (i = 1; i < repeat_min; i++)  
1166            {            {
1167            memcpy(code, previous, len);            code = previous;
1168            code += len;            goto END_REPEAT;
1169            }            }
         }  
1170    
1171        /* If the minimum is zero, stick BRAZERO in front of the first copy.          /* If the maximum is 1 or unlimited, we just have to stick in the
1172        Then, if there is a fixed upper limit, replicated up to that many times,          BRAZERO and do no more at this point. */
       sticking BRAZERO in front of all the optional ones. */  
1173    
1174        else          if (repeat_max <= 1)
         {  
         if (repeat_min == 0)  
1175            {            {
1176            memmove(previous+1, previous, len);            memmove(previous+1, previous, len);
1177            code++;            code++;
1178            *previous++ = OP_BRAZERO + repeat_type;            *previous++ = OP_BRAZERO + repeat_type;
1179            }            }
1180    
1181            /* If the maximum is greater than 1 and limited, we have to replicate
1182            in a nested fashion, sticking OP_BRAZERO before each set of brackets.
1183            The first one has to be handled carefully because it's the original
1184            copy, which has to be moved up. The remainder can be handled by code
1185            that is common with the non-zero minimum case below. We just have to
1186            adjust the value or repeat_max, since one less copy is required. */
1187    
1188            else
1189              {
1190              int offset;
1191              memmove(previous+4, previous, len);
1192              code += 4;
1193              *previous++ = OP_BRAZERO + repeat_type;
1194              *previous++ = OP_BRA;
1195    
1196              /* We chain together the bracket offset fields that have to be
1197              filled in later when the ends of the brackets are reached. */
1198    
1199              offset = (bralink == NULL)? 0 : previous - bralink;
1200              bralink = previous;
1201              *previous++ = offset >> 8;
1202              *previous++ = offset & 255;
1203              }
1204    
1205            repeat_max--;
1206            }
1207    
1208          /* If the minimum is greater than zero, replicate the group as many
1209          times as necessary, and adjust the maximum to the number of subsequent
1210          copies that we need. */
1211    
1212          else
1213            {
1214          for (i = 1; i < repeat_min; i++)          for (i = 1; i < repeat_min; i++)
1215            {            {
1216            memcpy(code, previous, len);            memcpy(code, previous, len);
1217            code += len;            code += len;
1218            }            }
1219            if (repeat_max > 0) repeat_max -= repeat_min;
1220            }
1221    
1222          /* This code is common to both the zero and non-zero minimum cases. If
1223          the maximum is limited, it replicates the group in a nested fashion,
1224          remembering the bracket starts on a stack. In the case of a zero minimum,
1225          the first one was set up above. In all cases the repeat_max now specifies
1226          the number of additional copies needed. */
1227    
1228          for (i = (repeat_min > 0)? repeat_min : 1; i < repeat_max; i++)        if (repeat_max >= 0)
1229            {
1230            for (i = repeat_max - 1; i >= 0; i--)
1231            {            {
1232            *code++ = OP_BRAZERO + repeat_type;            *code++ = OP_BRAZERO + repeat_type;
1233    
1234              /* All but the final copy start a new nesting, maintaining the
1235              chain of brackets outstanding. */
1236    
1237              if (i != 0)
1238                {
1239                int offset;
1240                *code++ = OP_BRA;
1241                offset = (bralink == NULL)? 0 : code - bralink;
1242                bralink = code;
1243                *code++ = offset >> 8;
1244                *code++ = offset & 255;
1245                }
1246    
1247            memcpy(code, previous, len);            memcpy(code, previous, len);
1248            code += len;            code += len;
1249            }            }
1250    
1251            /* Now chain through the pending brackets, and fill in their length
1252            fields (which are holding the chain links pro tem). */
1253    
1254            while (bralink != NULL)
1255              {
1256              int oldlinkoffset;
1257              int offset = code - bralink + 1;
1258              uschar *bra = code - offset;
1259              oldlinkoffset = (bra[1] << 8) + bra[2];
1260              bralink = (oldlinkoffset == 0)? NULL : bralink - oldlinkoffset;
1261              *code++ = OP_KET;
1262              *code++ = bra[1] = offset >> 8;
1263              *code++ = bra[2] = (offset & 255);
1264              }
1265          }          }
1266    
1267        /* If the maximum is unlimited, set a repeater in the final copy. */        /* If the maximum is unlimited, set a repeater in the final copy. We
1268          can't just offset backwards from the current code point, because we
1269          don't know if there's been an options resetting after the ket. The
1270          correct offset was computed above. */
1271    
1272        if (repeat_max == -1) code[-3] = OP_KETRMAX + repeat_type;        else code[-ketoffset] = OP_KETRMAX + repeat_type;
1273        }        }
1274    
1275      /* Else there's some kind of shambles */      /* Else there's some kind of shambles */
# Line 1121  for (;; ptr++) Line 1282  for (;; ptr++)
1282    
1283      /* In all case we no longer have a previous item. */      /* In all case we no longer have a previous item. */
1284    
1285        END_REPEAT:
1286      previous = NULL;      previous = NULL;
1287      break;      break;
1288    
1289    
1290      /* Start of nested bracket sub-expression, or comment or lookahead.      /* Start of nested bracket sub-expression, or comment or lookahead or
1291      First deal with special things that can come after a bracket; all are      lookbehind or option setting or condition. First deal with special things
1292      introduced by ?, and the appearance of any of them means that this is not a      that can come after a bracket; all are introduced by ?, and the appearance
1293      referencing group. They were checked for validity in the first pass over      of any of them means that this is not a referencing group. They were
1294      the string, so we don't have to check for syntax errors here.  */      checked for validity in the first pass over the string, so we don't have to
1295        check for syntax errors here.  */
1296    
1297      case '(':      case '(':
1298      previous = code;              /* Only real brackets can be repeated */      newoptions = options;
1299        condref = -1;
1300    
1301      if (*(++ptr) == '?')      if (*(++ptr) == '?')
1302        {        {
1303        bravalue = OP_BRA;        int set, unset;
1304          int *optset;
1305    
1306        switch (*(++ptr))        switch (*(++ptr))
1307          {          {
1308          case '#':          case '#':                 /* Comment; skip to ket */
         case 'i':  
         case 'm':  
         case 's':  
         case 'x':  
1309          ptr++;          ptr++;
1310          while (*ptr != ')') ptr++;          while (*ptr != ')') ptr++;
         previous = NULL;  
1311          continue;          continue;
1312    
1313          case ':':                 /* Non-extracting bracket */          case ':':                 /* Non-extracting bracket */
1314            bravalue = OP_BRA;
1315          ptr++;          ptr++;
1316          break;          break;
1317    
1318          case '=':                 /* Assertions can't be repeated */          case '(':
1319            bravalue = OP_COND;       /* Conditional group */
1320            if ((cd->ctypes[*(++ptr)] & ctype_digit) != 0)
1321              {
1322              condref = *ptr - '0';
1323              while (*(++ptr) != ')') condref = condref*10 + *ptr - '0';
1324              ptr++;
1325              }
1326            else ptr--;
1327            break;
1328    
1329            case '=':                 /* Positive lookahead */
1330          bravalue = OP_ASSERT;          bravalue = OP_ASSERT;
1331          ptr++;          ptr++;
         previous = NULL;  
1332          break;          break;
1333    
1334          case '!':          case '!':                 /* Negative lookahead */
1335          bravalue = OP_ASSERT_NOT;          bravalue = OP_ASSERT_NOT;
1336          ptr++;          ptr++;
         previous = NULL;  
1337          break;          break;
1338    
1339          case '>':                         /* "Match once" brackets */          case '<':                 /* Lookbehinds */
1340          if ((options & PCRE_EXTRA) != 0)  /* Not yet standard */          switch (*(++ptr))
1341            {            {
1342            bravalue = OP_ONCE;            case '=':               /* Positive lookbehind */
1343              bravalue = OP_ASSERTBACK;
1344              ptr++;
1345              break;
1346    
1347              case '!':               /* Negative lookbehind */
1348              bravalue = OP_ASSERTBACK_NOT;
1349            ptr++;            ptr++;
           previous = NULL;  
1350            break;            break;
1351    
1352              default:                /* Syntax error */
1353              *errorptr = ERR24;
1354              goto FAILED;
1355            }            }
1356          /* Else fall through */          break;
1357    
1358          default:          case '>':                 /* One-time brackets */
1359          *errorptr = ERR12;          bravalue = OP_ONCE;
1360          goto FAILED;          ptr++;
1361            break;
1362    
1363            default:                  /* Option setting */
1364            set = unset = 0;
1365            optset = &set;
1366    
1367            while (*ptr != ')' && *ptr != ':')
1368              {
1369              switch (*ptr++)
1370                {
1371                case '-': optset = &unset; break;
1372    
1373                case 'i': *optset |= PCRE_CASELESS; break;
1374                case 'm': *optset |= PCRE_MULTILINE; break;
1375                case 's': *optset |= PCRE_DOTALL; break;
1376                case 'x': *optset |= PCRE_EXTENDED; break;
1377                case 'U': *optset |= PCRE_UNGREEDY; break;
1378                case 'X': *optset |= PCRE_EXTRA; break;
1379    
1380                default:
1381                *errorptr = ERR12;
1382                goto FAILED;
1383                }
1384              }
1385    
1386            /* Set up the changed option bits, but don't change anything yet. */
1387    
1388            newoptions = (options | set) & (~unset);
1389    
1390            /* If the options ended with ')' this is not the start of a nested
1391            group with option changes, so the options change at this level. At top
1392            level there is nothing else to be done (the options will in fact have
1393            been set from the start of compiling as a result of the first pass) but
1394            at an inner level we must compile code to change the ims options if
1395            necessary, and pass the new setting back so that it can be put at the
1396            start of any following branches, and when this group ends, a resetting
1397            item can be compiled. */
1398    
1399            if (*ptr == ')')
1400              {
1401              if ((options & PCRE_INGROUP) != 0 &&
1402                  (options & PCRE_IMS) != (newoptions & PCRE_IMS))
1403                {
1404                *code++ = OP_OPT;
1405                *code++ = *optchanged = newoptions & PCRE_IMS;
1406                }
1407              options = newoptions;  /* Change options at this level */
1408              previous = NULL;       /* This item can't be repeated */
1409              continue;              /* It is complete */
1410              }
1411    
1412            /* If the options ended with ':' we are heading into a nested group
1413            with possible change of options. Such groups are non-capturing and are
1414            not assertions of any kind. All we need to do is skip over the ':';
1415            the newoptions value is handled below. */
1416    
1417            bravalue = OP_BRA;
1418            ptr++;
1419          }          }
1420        }        }
1421    
1422      /* Else we have a referencing group */      /* Else we have a referencing group; adjust the opcode. */
1423    
1424      else      else
1425        {        {
# Line 1193  for (;; ptr++) Line 1431  for (;; ptr++)
1431        bravalue = OP_BRA + *brackets;        bravalue = OP_BRA + *brackets;
1432        }        }
1433    
1434      /* Process nested bracketed re; at end pointer is on the bracket. We copy      /* Process nested bracketed re. Assertions may not be repeated, but other
1435      code into a non-register variable in order to be able to pass its address      kinds can be. We copy code into a non-register variable in order to be able
1436      because some compilers complain otherwise. */      to pass its address because some compilers complain otherwise. Pass in a
1437        new setting for the ims options if they have changed. */
1438    
1439        previous = (bravalue >= OP_ONCE)? code : NULL;
1440      *code = bravalue;      *code = bravalue;
1441        tempcode = code;
1442    
1443        if (!compile_regex(
1444             options | PCRE_INGROUP,       /* Set for all nested groups */
1445             ((options & PCRE_IMS) != (newoptions & PCRE_IMS))?
1446               newoptions & PCRE_IMS : -1, /* Pass ims options if changed */
1447             brackets,                     /* Bracket level */
1448             &tempcode,                    /* Where to put code (updated) */
1449             &ptr,                         /* Input pointer (updated) */
1450             errorptr,                     /* Where to put an error message */
1451             (bravalue == OP_ASSERTBACK ||
1452              bravalue == OP_ASSERTBACK_NOT), /* TRUE if back assert */
1453             condref,                      /* Condition reference number */
1454             &subreqchar,                  /* For possible last char */
1455             &subcountlits,                /* For literal count */
1456             cd))                          /* Tables block */
1457          goto FAILED;
1458    
1459        /* At the end of compiling, code is still pointing to the start of the
1460        group, while tempcode has been updated to point past the end of the group
1461        and any option resetting that may follow it. The pattern pointer (ptr)
1462        is on the bracket. */
1463    
1464        /* If this is a conditional bracket, check that there are no more than
1465        two branches in the group. */
1466    
1467        if (bravalue == OP_COND)
1468        {        {
1469        uschar *mcode = code;        uschar *tc = code;
1470        if (!compile_regex(options, brackets, &mcode, &ptr, errorptr))        condcount = 0;
1471    
1472          do {
1473             condcount++;
1474             tc += (tc[1] << 8) | tc[2];
1475             }
1476          while (*tc != OP_KET);
1477    
1478          if (condcount > 2)
1479            {
1480            *errorptr = ERR27;
1481          goto FAILED;          goto FAILED;
1482        code = mcode;          }
1483          }
1484    
1485        /* Handle updating of the required character. If the subpattern didn't
1486        set one, leave it as it was. Otherwise, update it for normal brackets of
1487        all kinds, forward assertions, and conditions with two branches. Don't
1488        update the literal count for forward assertions, however. If the bracket
1489        is followed by a quantifier with zero repeat, we have to back off. Hence
1490        the definition of prevreqchar and subcountlits outside the main loop so
1491        that they can be accessed for the back off. */
1492    
1493        if (subreqchar > 0 &&
1494             (bravalue >= OP_BRA || bravalue == OP_ONCE || bravalue == OP_ASSERT ||
1495             (bravalue == OP_COND && condcount == 2)))
1496          {
1497          prevreqchar = *reqchar;
1498          *reqchar = subreqchar;
1499          if (bravalue != OP_ASSERT) *countlits += subcountlits;
1500        }        }
1501    
1502        /* Now update the main code pointer to the end of the group. */
1503    
1504        code = tempcode;
1505    
1506        /* Error if hit end of pattern */
1507    
1508      if (*ptr != ')')      if (*ptr != ')')
1509        {        {
1510        *errorptr = ERR14;        *errorptr = ERR14;
# Line 1217  for (;; ptr++) Line 1517  for (;; ptr++)
1517      for validity in the pre-compiling pass. */      for validity in the pre-compiling pass. */
1518    
1519      case '\\':      case '\\':
1520      oldptr = ptr;      tempptr = ptr;
1521      c = check_escape(&ptr, errorptr, *brackets, options, FALSE);      c = check_escape(&ptr, errorptr, *brackets, options, FALSE, cd);
1522    
1523      /* Handle metacharacters introduced by \. For ones like \d, the ESC_ values      /* Handle metacharacters introduced by \. For ones like \d, the ESC_ values
1524      are arranged to be the negation of the corresponding OP_values. For the      are arranged to be the negation of the corresponding OP_values. For the
# Line 1231  for (;; ptr++) Line 1531  for (;; ptr++)
1531        {        {
1532        if (-c >= ESC_REF)        if (-c >= ESC_REF)
1533          {          {
         int refnum = -c - ESC_REF;  
         if (*brackets < refnum)  
           {  
           *errorptr = ERR15;  
           goto FAILED;  
           }  
1534          previous = code;          previous = code;
1535          *code++ = OP_REF;          *code++ = OP_REF;
1536          *code++ = refnum;          *code++ = -c - ESC_REF;
1537          }          }
1538        else        else
1539          {          {
1540          previous = (-c > ESC_b && -c < ESC_X)? code : NULL;          previous = (-c > ESC_b && -c < ESC_Z)? code : NULL;
1541          *code++ = -c;          *code++ = -c;
1542          }          }
1543        continue;        continue;
# Line 1251  for (;; ptr++) Line 1545  for (;; ptr++)
1545    
1546      /* Data character: reset and fall through */      /* Data character: reset and fall through */
1547    
1548      ptr = oldptr;      ptr = tempptr;
1549      c = '\\';      c = '\\';
1550    
1551      /* Handle a run of data characters until a metacharacter is encountered.      /* Handle a run of data characters until a metacharacter is encountered.
# Line 1269  for (;; ptr++) Line 1563  for (;; ptr++)
1563        {        {
1564        if ((options & PCRE_EXTENDED) != 0)        if ((options & PCRE_EXTENDED) != 0)
1565          {          {
1566          if ((pcre_ctypes[c] & ctype_space) != 0) continue;          if ((cd->ctypes[c] & ctype_space) != 0) continue;
1567          if (c == '#')          if (c == '#')
1568            {            {
1569            while ((c = *(++ptr)) != 0 && c != '\n');            while ((c = *(++ptr)) != 0 && c != '\n');
# Line 1284  for (;; ptr++) Line 1578  for (;; ptr++)
1578    
1579        if (c == '\\')        if (c == '\\')
1580          {          {
1581          oldptr = ptr;          tempptr = ptr;
1582          c = check_escape(&ptr, errorptr, *brackets, options, FALSE);          c = check_escape(&ptr, errorptr, *brackets, options, FALSE, cd);
1583          if (c < 0) { ptr = oldptr; break; }          if (c < 0) { ptr = tempptr; break; }
1584          }          }
1585    
1586        /* Ordinary character or single-char escape */        /* Ordinary character or single-char escape */
# Line 1297  for (;; ptr++) Line 1591  for (;; ptr++)
1591    
1592      /* This "while" is the end of the "do" above. */      /* This "while" is the end of the "do" above. */
1593    
1594      while (length < 255 && (pcre_ctypes[c = *(++ptr)] & ctype_meta) == 0);      while (length < 255 && (cd->ctypes[c = *(++ptr)] & ctype_meta) == 0);
1595    
1596        /* Update the last character and the count of literals */
1597    
1598        prevreqchar = (length > 1)? code[-2] : *reqchar;
1599        *reqchar = code[-1];
1600        *countlits += length;
1601    
1602      /* Compute the length and set it in the data vector, and advance to      /* Compute the length and set it in the data vector, and advance to
1603      the next state. */      the next state. */
# Line 1327  return FALSE; Line 1627  return FALSE;
1627  /* On entry, ptr is pointing past the bracket character, but on return  /* On entry, ptr is pointing past the bracket character, but on return
1628  it points to the closing bracket, or vertical bar, or end of string.  it points to the closing bracket, or vertical bar, or end of string.
1629  The code variable is pointing at the byte into which the BRA operator has been  The code variable is pointing at the byte into which the BRA operator has been
1630  stored.  stored. If the ims options are changed at the start (for a (?ims: group) or
1631    during any branch, we need to insert an OP_OPT item at the start of every
1632    following branch to ensure they get set correctly at run time, and also pass
1633    the new options into every subsequent branch compile.
1634    
1635  Argument:  Argument:
1636    options   the option bits    options     the option bits
1637    brackets  -> int containing the number of extracting brackets used    optchanged  new ims options to set as if (?ims) were at the start, or -1
1638    codeptr   -> the address of the current code pointer                 for no change
1639    ptrptr    -> the address of the current pattern pointer    brackets    -> int containing the number of extracting brackets used
1640    errorptr  -> pointer to error message    codeptr     -> the address of the current code pointer
1641      ptrptr      -> the address of the current pattern pointer
1642      errorptr    -> pointer to error message
1643      lookbehind  TRUE if this is a lookbehind assertion
1644      condref     > 0 for OPT_CREF setting at start of conditional group
1645      reqchar     -> place to put the last required character, or a negative number
1646      countlits   -> place to put the shortest literal count of any branch
1647      cd          points to the data block with tables pointers
1648    
1649  Returns:    TRUE on success  Returns:      TRUE on success
1650  */  */
1651    
1652  static BOOL  static BOOL
1653  compile_regex(int options, int *brackets, uschar **codeptr,  compile_regex(int options, int optchanged, int *brackets, uschar **codeptr,
1654    const uschar **ptrptr, const char **errorptr)    const uschar **ptrptr, const char **errorptr, BOOL lookbehind, int condref,
1655      int *reqchar, int *countlits, compile_data *cd)
1656  {  {
1657  const uschar *ptr = *ptrptr;  const uschar *ptr = *ptrptr;
1658  uschar *code = *codeptr;  uschar *code = *codeptr;
1659    uschar *last_branch = code;
1660  uschar *start_bracket = code;  uschar *start_bracket = code;
1661    uschar *reverse_count = NULL;
1662    int oldoptions = options & PCRE_IMS;
1663    int branchreqchar, branchcountlits;
1664    
1665    *reqchar = -1;
1666    *countlits = INT_MAX;
1667    code += 3;
1668    
1669    /* At the start of a reference-based conditional group, insert the reference
1670    number as an OP_CREF item. */
1671    
1672    if (condref > 0)
1673      {
1674      *code++ = OP_CREF;
1675      *code++ = condref;
1676      }
1677    
1678    /* Loop for each alternative branch */
1679    
1680  for (;;)  for (;;)
1681    {    {
1682    int length;    int length;
   uschar *last_branch = code;  
1683    
1684    code += 3;    /* Handle change of options */
1685    if (!compile_branch(options, brackets, &code, &ptr, errorptr))  
1686      if (optchanged >= 0)
1687        {
1688        *code++ = OP_OPT;
1689        *code++ = optchanged;
1690        options = (options & ~PCRE_IMS) | optchanged;
1691        }
1692    
1693      /* Set up dummy OP_REVERSE if lookbehind assertion */
1694    
1695      if (lookbehind)
1696        {
1697        *code++ = OP_REVERSE;
1698        reverse_count = code;
1699        *code++ = 0;
1700        *code++ = 0;
1701        }
1702    
1703      /* Now compile the branch */
1704    
1705      if (!compile_branch(options, brackets, &code, &ptr, errorptr, &optchanged,
1706          &branchreqchar, &branchcountlits, cd))
1707      {      {
1708      *ptrptr = ptr;      *ptrptr = ptr;
1709      return FALSE;      return FALSE;
# Line 1365  for (;;) Line 1715  for (;;)
1715    last_branch[1] = length >> 8;    last_branch[1] = length >> 8;
1716    last_branch[2] = length & 255;    last_branch[2] = length & 255;
1717    
1718      /* Save the last required character if all branches have the same; a current
1719      value of -1 means unset, while -2 means "previous branch had no last required
1720      char".  */
1721    
1722      if (*reqchar != -2)
1723        {
1724        if (branchreqchar >= 0)
1725          {
1726          if (*reqchar == -1) *reqchar = branchreqchar;
1727          else if (*reqchar != branchreqchar) *reqchar = -2;
1728          }
1729        else *reqchar = -2;
1730        }
1731    
1732      /* Keep the shortest literal count */
1733    
1734      if (branchcountlits < *countlits) *countlits = branchcountlits;
1735      DPRINTF(("literal count = %d min=%d\n", branchcountlits, *countlits));
1736    
1737      /* If lookbehind, check that this branch matches a fixed-length string,
1738      and put the length into the OP_REVERSE item. Temporarily mark the end of
1739      the branch with OP_END. */
1740    
1741      if (lookbehind)
1742        {
1743        *code = OP_END;
1744        length = find_fixedlength(last_branch);
1745        DPRINTF(("fixed length = %d\n", length));
1746        if (length < 0)
1747          {
1748          *errorptr = ERR25;
1749          *ptrptr = ptr;
1750          return FALSE;
1751          }
1752        reverse_count[0] = (length >> 8);
1753        reverse_count[1] = length & 255;
1754        }
1755    
1756    /* Reached end of expression, either ')' or end of pattern. Insert a    /* Reached end of expression, either ')' or end of pattern. Insert a
1757    terminating ket and the length of the whole bracketed item, and return,    terminating ket and the length of the whole bracketed item, and return,
1758    leaving the pointer at the terminating char. */    leaving the pointer at the terminating char. If any of the ims options
1759      were changed inside the group, compile a resetting op-code following. */
1760    
1761    if (*ptr != '|')    if (*ptr != '|')
1762      {      {
# Line 1375  for (;;) Line 1764  for (;;)
1764      *code++ = OP_KET;      *code++ = OP_KET;
1765      *code++ = length >> 8;      *code++ = length >> 8;
1766      *code++ = length & 255;      *code++ = length & 255;
1767        if (optchanged >= 0)
1768          {
1769          *code++ = OP_OPT;
1770          *code++ = oldoptions;
1771          }
1772      *codeptr = code;      *codeptr = code;
1773      *ptrptr = ptr;      *ptrptr = ptr;
1774      return TRUE;      return TRUE;
# Line 1383  for (;;) Line 1777  for (;;)
1777    /* Another branch follows; insert an "or" node and advance the pointer. */    /* Another branch follows; insert an "or" node and advance the pointer. */
1778    
1779    *code = OP_ALT;    *code = OP_ALT;
1780      last_branch = code;
1781      code += 3;
1782    ptr++;    ptr++;
1783    }    }
1784  /* Control never reaches here */  /* Control never reaches here */
# Line 1390  for (;;) Line 1786  for (;;)
1786    
1787    
1788    
1789    
1790    /*************************************************
1791    *      Find first significant op code            *
1792    *************************************************/
1793    
1794    /* This is called by several functions that scan a compiled expression looking
1795    for a fixed first character, or an anchoring op code etc. It skips over things
1796    that do not influence this. For one application, a change of caseless option is
1797    important.
1798    
1799    Arguments:
1800      code       pointer to the start of the group
1801      options    pointer to external options
1802      optbit     the option bit whose changing is significant, or
1803                 zero if none are
1804      optstop    TRUE to return on option change, otherwise change the options
1805                   value and continue
1806    
1807    Returns:     pointer to the first significant opcode
1808    */
1809    
1810    static const uschar*
1811    first_significant_code(const uschar *code, int *options, int optbit,
1812      BOOL optstop)
1813    {
1814    for (;;)
1815      {
1816      switch ((int)*code)
1817        {
1818        case OP_OPT:
1819        if (optbit > 0 && ((int)code[1] & optbit) != (*options & optbit))
1820          {
1821          if (optstop) return code;
1822          *options = (int)code[1];
1823          }
1824        code += 2;
1825        break;
1826    
1827        case OP_CREF:
1828        code += 2;
1829        break;
1830    
1831        case OP_WORD_BOUNDARY:
1832        case OP_NOT_WORD_BOUNDARY:
1833        code++;
1834        break;
1835    
1836        case OP_ASSERT_NOT:
1837        case OP_ASSERTBACK:
1838        case OP_ASSERTBACK_NOT:
1839        do code += (code[1] << 8) + code[2]; while (*code == OP_ALT);
1840        code += 3;
1841        break;
1842    
1843        default:
1844        return code;
1845        }
1846      }
1847    /* Control never reaches here */
1848    }
1849    
1850    
1851    
1852    
1853  /*************************************************  /*************************************************
1854  *          Check for anchored expression         *  *          Check for anchored expression         *
1855  *************************************************/  *************************************************/
# Line 1400  all of whose alternatives start with OP_ Line 1860  all of whose alternatives start with OP_
1860  it's anchored. However, if this is a multiline pattern, then only OP_SOD  it's anchored. However, if this is a multiline pattern, then only OP_SOD
1861  counts, since OP_CIRC can match in the middle.  counts, since OP_CIRC can match in the middle.
1862    
1863  A branch is also implicitly anchored if it starts with .* because that will try  A branch is also implicitly anchored if it starts with .* and DOTALL is set,
1864  the rest of the pattern at all possible matching points, so there is no point  because that will try the rest of the pattern at all possible matching points,
1865  trying them again.  so there is no point trying them again.
1866    
1867  Argument:  points to start of expression (the bracket)  Arguments:
1868  Returns:   TRUE or FALSE    code       points to start of expression (the bracket)
1869      options    points to the options setting
1870    
1871    Returns:     TRUE or FALSE
1872  */  */
1873    
1874  static BOOL  static BOOL
1875  is_anchored(register const uschar *code, BOOL multiline)  is_anchored(register const uschar *code, int *options)
1876  {  {
1877  do {  do {
1878     int op = (int)code[3];     const uschar *scode = first_significant_code(code + 3, options,
1879     if (op >= OP_BRA || op == OP_ASSERT || op == OP_ONCE)       PCRE_MULTILINE, FALSE);
1880       { if (!is_anchored(code+3, multiline)) return FALSE; }     register int op = *scode;
1881     else if (op == OP_TYPESTAR || op == OP_TYPEMINSTAR)     if (op >= OP_BRA || op == OP_ASSERT || op == OP_ONCE || op == OP_COND)
1882       { if (code[4] != OP_ANY) return FALSE; }       { if (!is_anchored(scode, options)) return FALSE; }
1883     else if (op != OP_SOD && (multiline || op != OP_CIRC)) return FALSE;     else if ((op == OP_TYPESTAR || op == OP_TYPEMINSTAR) &&
1884                (*options & PCRE_DOTALL) != 0)
1885         { if (scode[1] != OP_ANY) return FALSE; }
1886       else if (op != OP_SOD &&
1887               ((*options & PCRE_MULTILINE) != 0 || op != OP_CIRC))
1888         return FALSE;
1889     code += (code[1] << 8) + code[2];     code += (code[1] << 8) + code[2];
1890     }     }
1891  while (*code == OP_ALT);  while (*code == OP_ALT);
# Line 1427  return TRUE; Line 1895  return TRUE;
1895    
1896    
1897  /*************************************************  /*************************************************
1898  *     Check for start with \n line expression    *  *         Check for starting with ^ or .*        *
1899  *************************************************/  *************************************************/
1900    
1901  /* This is called for multiline expressions to try to find out if every branch  /* This is called to find out if every branch starts with ^ or .* so that
1902  starts with ^ so that "first char" processing can be done to speed things up.  "first char" processing can be done to speed things up in multiline
1903    matching and for non-DOTALL patterns that start with .* (which must start at
1904    the beginning or after \n).
1905    
1906  Argument:  points to start of expression (the bracket)  Argument:  points to start of expression (the bracket)
1907  Returns:   TRUE or FALSE  Returns:   TRUE or FALSE
# Line 1441  static BOOL Line 1911  static BOOL
1911  is_startline(const uschar *code)  is_startline(const uschar *code)
1912  {  {
1913  do {  do {
1914     if ((int)code[3] >= OP_BRA || code[3] == OP_ASSERT)     const uschar *scode = first_significant_code(code + 3, NULL, 0, FALSE);
1915       { if (!is_startline(code+3)) return FALSE; }     register int op = *scode;
1916     else if (code[3] != OP_CIRC) return FALSE;     if (op >= OP_BRA || op == OP_ASSERT || op == OP_ONCE || op == OP_COND)
1917         { if (!is_startline(scode)) return FALSE; }
1918       else if (op == OP_TYPESTAR || op == OP_TYPEMINSTAR)
1919         { if (scode[1] != OP_ANY) return FALSE; }
1920       else if (op != OP_CIRC) return FALSE;
1921     code += (code[1] << 8) + code[2];     code += (code[1] << 8) + code[2];
1922     }     }
1923  while (*code == OP_ALT);  while (*code == OP_ALT);
# Line 1462  Consider each alternative branch. If the Line 1936  Consider each alternative branch. If the
1936  a bracket all of whose alternatives start with the same char (recurse ad lib),  a bracket all of whose alternatives start with the same char (recurse ad lib),
1937  then we return that char, otherwise -1.  then we return that char, otherwise -1.
1938    
1939  Argument:  points to start of expression (the bracket)  Arguments:
1940  Returns:   -1 or the fixed first char    code       points to start of expression (the bracket)
1941      options    pointer to the options (used to check casing changes)
1942    
1943    Returns:     -1 or the fixed first char
1944  */  */
1945    
1946  static int  static int
1947  find_firstchar(uschar *code)  find_firstchar(const uschar *code, int *options)
1948  {  {
1949  register int c = -1;  register int c = -1;
1950  do  do {
1951    {     int d;
1952    register int charoffset = 4;     const uschar *scode = first_significant_code(code + 3, options,
1953         PCRE_CASELESS, TRUE);
1954    if ((int)code[3] >= OP_BRA || code[3] == OP_ASSERT)     register int op = *scode;
1955      {  
1956      register int d;     if (op >= OP_BRA) op = OP_BRA;
1957      if ((d = find_firstchar(code+3)) < 0) return -1;  
1958      if (c < 0) c = d; else if (c != d) return -1;     switch(op)
1959      }       {
1960         default:
1961    else switch(code[3])       return -1;
1962      {  
1963      default:       case OP_BRA:
1964      return -1;       case OP_ASSERT:
1965         case OP_ONCE:
1966      case OP_EXACT:       /* Fall through */       case OP_COND:
1967      charoffset++;       if ((d = find_firstchar(scode, options)) < 0) return -1;
1968         if (c < 0) c = d; else if (c != d) return -1;
1969      case OP_CHARS:       /* Fall through */       break;
1970      charoffset++;  
1971         case OP_EXACT:       /* Fall through */
1972         scode++;
1973    
1974         case OP_CHARS:       /* Fall through */
1975         scode++;
1976    
1977         case OP_PLUS:
1978         case OP_MINPLUS:
1979         if (c < 0) c = scode[1]; else if (c != scode[1]) return -1;
1980         break;
1981         }
1982    
1983      case OP_PLUS:     code += (code[1] << 8) + code[2];
1984      case OP_MINPLUS:     }
     if (c < 0) c = code[charoffset]; else if (c != code[charoffset]) return -1;  
     break;  
     }  
   code += (code[1] << 8) + code[2];  
   }  
1985  while (*code == OP_ALT);  while (*code == OP_ALT);
1986  return c;  return c;
1987  }  }
1988    
1989    
1990    
1991    
1992    
1993  /*************************************************  /*************************************************
1994  *        Compile a Regular Expression            *  *        Compile a Regular Expression            *
1995  *************************************************/  *************************************************/
# Line 1517  Arguments: Line 2002  Arguments:
2002    options      various option bits    options      various option bits
2003    errorptr     pointer to pointer to error text    errorptr     pointer to pointer to error text
2004    erroroffset  ptr offset in pattern where error was detected    erroroffset  ptr offset in pattern where error was detected
2005      tables       pointer to character tables or NULL
2006    
2007  Returns:       pointer to compiled data block, or NULL on error,  Returns:       pointer to compiled data block, or NULL on error,
2008                 with errorptr and erroroffset set                 with errorptr and erroroffset set
# Line 1524  Returns:       pointer to compiled data Line 2010  Returns:       pointer to compiled data
2010    
2011  pcre *  pcre *
2012  pcre_compile(const char *pattern, int options, const char **errorptr,  pcre_compile(const char *pattern, int options, const char **errorptr,
2013    int *erroroffset)    int *erroroffset, const unsigned char *tables)
2014  {  {
2015  real_pcre *re;  real_pcre *re;
 int spaces = 0;  
2016  int length = 3;      /* For initial BRA plus length */  int length = 3;      /* For initial BRA plus length */
2017  int runlength;  int runlength;
2018  int c, size;  int c, size, reqchar, countlits;
2019  int bracount = 0;  int bracount = 0;
 int brastack[200];  
2020  int top_backref = 0;  int top_backref = 0;
2021    int branch_extra = 0;
2022    int branch_newextra;
2023  unsigned int brastackptr = 0;  unsigned int brastackptr = 0;
2024  uschar *code;  uschar *code;
2025  const uschar *ptr;  const uschar *ptr;
2026    compile_data compile_block;
2027    int brastack[BRASTACK_SIZE];
2028    uschar bralenstack[BRASTACK_SIZE];
2029    
2030  #ifdef DEBUG  #ifdef DEBUG
2031  uschar *code_base, *code_end;  uschar *code_base, *code_end;
# Line 1563  if ((options & ~PUBLIC_OPTIONS) != 0) Line 2052  if ((options & ~PUBLIC_OPTIONS) != 0)
2052    return NULL;    return NULL;
2053    }    }
2054    
2055    /* Set up pointers to the individual character tables */
2056    
2057    if (tables == NULL) tables = pcre_default_tables;
2058    compile_block.lcc = tables + lcc_offset;
2059    compile_block.fcc = tables + fcc_offset;
2060    compile_block.cbits = tables + cbits_offset;
2061    compile_block.ctypes = tables + ctypes_offset;
2062    
2063    /* Reflect pattern for debugging output */
2064    
2065  DPRINTF(("------------------------------------------------------------------\n"));  DPRINTF(("------------------------------------------------------------------\n"));
2066  DPRINTF(("%s\n", pattern));  DPRINTF(("%s\n", pattern));
2067    
# Line 1579  while ((c = *(++ptr)) != 0) Line 2078  while ((c = *(++ptr)) != 0)
2078    int min, max;    int min, max;
2079    int class_charcount;    int class_charcount;
2080    
2081    if ((pcre_ctypes[c] & ctype_space) != 0)    if ((options & PCRE_EXTENDED) != 0)
     {  
     if ((options & PCRE_EXTENDED) != 0) continue;  
     spaces++;  
     }  
   
   if (c == '#' && (options & PCRE_EXTENDED) != 0)  
2082      {      {
2083      while ((c = *(++ptr)) != 0 && c != '\n');      if ((compile_block.ctypes[c] & ctype_space) != 0) continue;
2084      continue;      if (c == '#')
2085          {
2086          while ((c = *(++ptr)) != 0 && c != '\n');
2087          continue;
2088          }
2089      }      }
2090    
2091    switch(c)    switch(c)
# Line 1601  while ((c = *(++ptr)) != 0) Line 2098  while ((c = *(++ptr)) != 0)
2098      case '\\':      case '\\':
2099        {        {
2100        const uschar *save_ptr = ptr;        const uschar *save_ptr = ptr;
2101        c = check_escape(&ptr, errorptr, bracount, options, FALSE);        c = check_escape(&ptr, errorptr, bracount, options, FALSE, &compile_block);
2102        if (*errorptr != NULL) goto PCRE_ERROR_RETURN;        if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
2103        if (c >= 0)        if (c >= 0)
2104          {          {
# Line 1621  while ((c = *(++ptr)) != 0) Line 2118  while ((c = *(++ptr)) != 0)
2118        int refnum = -c - ESC_REF;        int refnum = -c - ESC_REF;
2119        if (refnum > top_backref) top_backref = refnum;        if (refnum > top_backref) top_backref = refnum;
2120        length++;   /* For single back reference */        length++;   /* For single back reference */
2121        if (ptr[1] == '{' && is_counted_repeat(ptr+2))        if (ptr[1] == '{' && is_counted_repeat(ptr+2, &compile_block))
2122          {          {
2123          ptr = read_repeat_counts(ptr+2, &min, &max, errorptr);          ptr = read_repeat_counts(ptr+2, &min, &max, errorptr, &compile_block);
2124          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
2125          if ((min == 0 && (max == 1 || max == -1)) ||          if ((min == 0 && (max == 1 || max == -1)) ||
2126            (min == 1 && max == -1))            (min == 1 && max == -1))
# Line 1647  while ((c = *(++ptr)) != 0) Line 2144  while ((c = *(++ptr)) != 0)
2144      or back reference. */      or back reference. */
2145    
2146      case '{':      case '{':
2147      if (!is_counted_repeat(ptr+1)) goto NORMAL_CHAR;      if (!is_counted_repeat(ptr+1, &compile_block)) goto NORMAL_CHAR;
2148      ptr = read_repeat_counts(ptr+1, &min, &max, errorptr);      ptr = read_repeat_counts(ptr+1, &min, &max, errorptr, &compile_block);
2149      if (*errorptr != NULL) goto PCRE_ERROR_RETURN;      if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
2150      if ((min == 0 && (max == 1 || max == -1)) ||      if ((min == 0 && (max == 1 || max == -1)) ||
2151        (min == 1 && max == -1))        (min == 1 && max == -1))
# Line 1662  while ((c = *(++ptr)) != 0) Line 2159  while ((c = *(++ptr)) != 0)
2159      if (ptr[1] == '?') ptr++;      if (ptr[1] == '?') ptr++;
2160      continue;      continue;
2161    
2162      /* An alternation contains an offset to the next branch or ket. */      /* An alternation contains an offset to the next branch or ket. If any ims
2163        options changed in the previous branch(es), and/or if we are in a
2164        lookbehind assertion, extra space will be needed at the start of the
2165        branch. This is handled by branch_extra. */
2166    
2167      case '|':      case '|':
2168      length += 3;      length += 3 + branch_extra;
2169      continue;      continue;
2170    
2171      /* A character class uses 33 characters. Don't worry about character types      /* A character class uses 33 characters. Don't worry about character types
# Line 1679  while ((c = *(++ptr)) != 0) Line 2180  while ((c = *(++ptr)) != 0)
2180        {        {
2181        if (*ptr == '\\')        if (*ptr == '\\')
2182          {          {
2183          int ch = check_escape(&ptr, errorptr, bracount, options, TRUE);          int ch = check_escape(&ptr, errorptr, bracount, options, TRUE,
2184              &compile_block);
2185          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
2186          if (-ch == ESC_b) class_charcount++; else class_charcount = 10;          if (-ch == ESC_b) class_charcount++; else class_charcount = 10;
2187          }          }
# Line 1696  while ((c = *(++ptr)) != 0) Line 2198  while ((c = *(++ptr)) != 0)
2198    
2199        /* A repeat needs either 1 or 5 bytes. */        /* A repeat needs either 1 or 5 bytes. */
2200    
2201        if (*ptr != 0 && ptr[1] == '{' && is_counted_repeat(ptr+2))        if (*ptr != 0 && ptr[1] == '{' && is_counted_repeat(ptr+2, &compile_block))
2202          {          {
2203          ptr = read_repeat_counts(ptr+2, &min, &max, errorptr);          ptr = read_repeat_counts(ptr+2, &min, &max, errorptr, &compile_block);
2204          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
2205          if ((min == 0 && (max == 1 || max == -1)) ||          if ((min == 0 && (max == 1 || max == -1)) ||
2206            (min == 1 && max == -1))            (min == 1 && max == -1))
# Line 1712  while ((c = *(++ptr)) != 0) Line 2214  while ((c = *(++ptr)) != 0)
2214      /* Brackets may be genuine groups or special things */      /* Brackets may be genuine groups or special things */
2215    
2216      case '(':      case '(':
2217        branch_newextra = 0;
2218    
2219      /* Handle special forms of bracket, which all start (? */      /* Handle special forms of bracket, which all start (? */
2220    
2221      if (ptr[1] == '?') switch (c = ptr[2])      if (ptr[1] == '?')
2222        {        {
2223        /* Skip over comments entirely */        int set, unset;
2224        case '#':        int *optset;
       ptr += 3;  
       while (*ptr != 0 && *ptr != ')') ptr++;  
       if (*ptr == 0)  
         {  
         *errorptr = ERR18;  
         goto PCRE_ERROR_RETURN;  
         }  
       continue;  
2225    
2226        /* Non-referencing groups and lookaheads just move the pointer on, and        switch (c = ptr[2])
2227        then behave like a non-special bracket, except that they don't increment          {
2228        the count of extracting brackets. */          /* Skip over comments entirely */
2229            case '#':
2230        case ':':          ptr += 3;
2231        case '=':          while (*ptr != 0 && *ptr != ')') ptr++;
2232        case '!':          if (*ptr == 0)
2233        ptr += 2;            {
2234        break;            *errorptr = ERR18;
2235              goto PCRE_ERROR_RETURN;
2236              }
2237            continue;
2238    
2239        /* Ditto for the "once only" bracket, allowed only if the extra bit          /* Non-referencing groups and lookaheads just move the pointer on, and
2240        is set. */          then behave like a non-special bracket, except that they don't increment
2241            the count of extracting brackets. Ditto for the "once only" bracket,
2242            which is in Perl from version 5.005. */
2243    
2244        case '>':          case ':':
2245        if ((options & PCRE_EXTRA) != 0)          case '=':
2246          {          case '!':
2247            case '>':
2248          ptr += 2;          ptr += 2;
2249          break;          break;
         }  
       /* Else fall thourh */  
2250    
2251        /* Else loop setting valid options until ) is met. Anything else is an          /* Lookbehinds are in Perl from version 5.005 */
       error. */  
2252    
2253        default:          case '<':
2254        ptr += 2;          if (ptr[3] == '=' || ptr[3] == '!')
       for (;; ptr++)  
         {  
         if ((c = *ptr) == 'i')  
2255            {            {
2256            options |= PCRE_CASELESS;            ptr += 3;
2257            continue;            branch_newextra = 3;
2258              length += 3;         /* For the first branch */
2259              break;
2260            }            }
2261          else if ((c = *ptr) == 'm')          *errorptr = ERR24;
2262            goto PCRE_ERROR_RETURN;
2263    
2264            /* Conditionals are in Perl from version 5.005. The bracket must either
2265            be followed by a number (for bracket reference) or by an assertion
2266            group. */
2267    
2268            case '(':
2269            if ((compile_block.ctypes[ptr[3]] & ctype_digit) != 0)
2270              {
2271              ptr += 4;
2272              length += 2;
2273              while ((compile_block.ctypes[*ptr] & ctype_digit) != 0) ptr++;
2274              if (*ptr != ')')
2275                {
2276                *errorptr = ERR26;
2277                goto PCRE_ERROR_RETURN;
2278                }
2279              }
2280            else   /* An assertion must follow */
2281            {            {
2282            options |= PCRE_MULTILINE;            ptr++;   /* Can treat like ':' as far as spacing is concerned */
2283            continue;  
2284              if (ptr[2] != '?' || strchr("=!<", ptr[3]) == NULL)
2285                {
2286                ptr += 2;    /* To get right offset in message */
2287                *errorptr = ERR28;
2288                goto PCRE_ERROR_RETURN;
2289                }
2290            }            }
2291          else if (c == 's')          break;
2292    
2293            /* Else loop checking valid options until ) is met. Anything else is an
2294            error. If we are without any brackets, i.e. at top level, the settings
2295            act as if specified in the options, so massage the options immediately.
2296            This is for backward compatibility with Perl 5.004. */
2297    
2298            default:
2299            set = unset = 0;
2300            optset = &set;
2301            ptr += 2;
2302    
2303            for (;; ptr++)
2304            {            {
2305            options |= PCRE_DOTALL;            c = *ptr;
2306            continue;            switch (c)
2307                {
2308                case 'i':
2309                *optset |= PCRE_CASELESS;
2310                continue;
2311    
2312                case 'm':
2313                *optset |= PCRE_MULTILINE;
2314                continue;
2315    
2316                case 's':
2317                *optset |= PCRE_DOTALL;
2318                continue;
2319    
2320                case 'x':
2321                *optset |= PCRE_EXTENDED;
2322                continue;
2323    
2324                case 'X':
2325                *optset |= PCRE_EXTRA;
2326                continue;
2327    
2328                case 'U':
2329                *optset |= PCRE_UNGREEDY;
2330                continue;
2331    
2332                case '-':
2333                optset = &unset;
2334                continue;
2335    
2336                /* A termination by ')' indicates an options-setting-only item;
2337                this is global at top level; otherwise nothing is done here and
2338                it is handled during the compiling process on a per-bracket-group
2339                basis. */
2340    
2341                case ')':
2342                if (brastackptr == 0)
2343                  {
2344                  options = (options | set) & (~unset);
2345                  set = unset = 0;     /* To save length */
2346                  }
2347                /* Fall through */
2348    
2349                /* A termination by ':' indicates the start of a nested group with
2350                the given options set. This is again handled at compile time, but
2351                we must allow for compiled space if any of the ims options are
2352                set. We also have to allow for resetting space at the end of
2353                the group, which is why 4 is added to the length and not just 2.
2354                If there are several changes of options within the same group, this
2355                will lead to an over-estimate on the length, but this shouldn't
2356                matter very much. We also have to allow for resetting options at
2357                the start of any alternations, which we do by setting
2358                branch_newextra to 2. Finally, we record whether the case-dependent
2359                flag ever changes within the regex. This is used by the "required
2360                character" code. */
2361    
2362                case ':':
2363                if (((set|unset) & PCRE_IMS) != 0)
2364                  {
2365                  length += 4;
2366                  branch_newextra = 2;
2367                  if (((set|unset) & PCRE_CASELESS) != 0) options |= PCRE_ICHANGED;
2368                  }
2369                goto END_OPTIONS;
2370    
2371                /* Unrecognized option character */
2372    
2373                default:
2374                *errorptr = ERR12;
2375                goto PCRE_ERROR_RETURN;
2376                }
2377            }            }
2378          else if (c == 'x')  
2379            /* If we hit a closing bracket, that's it - this is a freestanding
2380            option-setting. We need to ensure that branch_extra is updated if
2381            necessary. The only values branch_newextra can have here are 0 or 2.
2382            If the value is 2, then branch_extra must either be 2 or 5, depending
2383            on whether this is a lookbehind group or not. */
2384    
2385            END_OPTIONS:
2386            if (c == ')')
2387            {            {
2388            options |= PCRE_EXTENDED;            if (branch_newextra == 2 && (branch_extra == 0 || branch_extra == 3))
2389            length -= spaces;          /* Already counted spaces */              branch_extra += branch_newextra;
2390            continue;            continue;
2391            }            }
         else if (c == ')') break;  
2392    
2393          *errorptr = ERR12;          /* If options were terminated by ':' control comes here. Fall through
2394          goto PCRE_ERROR_RETURN;          to handle the group below. */
2395          }          }
       continue;                      /* End of this bracket handling */  
2396        }        }
2397    
2398      /* Extracting brackets must be counted so we can process escapes in a      /* Extracting brackets must be counted so we can process escapes in a
# Line 1791  while ((c = *(++ptr)) != 0) Line 2401  while ((c = *(++ptr)) != 0)
2401      else bracount++;      else bracount++;
2402    
2403      /* Non-special forms of bracket. Save length for computing whole length      /* Non-special forms of bracket. Save length for computing whole length
2404      at end if there's a repeat that requires duplication of the group. */      at end if there's a repeat that requires duplication of the group. Also
2405        save the current value of branch_extra, and start the new group with
2406        the new value. If non-zero, this will either be 2 for a (?imsx: group, or 3
2407        for a lookbehind assertion. */
2408    
2409      if (brastackptr >= sizeof(brastack)/sizeof(int))      if (brastackptr >= sizeof(brastack)/sizeof(int))
2410        {        {
# Line 1799  while ((c = *(++ptr)) != 0) Line 2412  while ((c = *(++ptr)) != 0)
2412        goto PCRE_ERROR_RETURN;        goto PCRE_ERROR_RETURN;
2413        }        }
2414    
2415        bralenstack[brastackptr] = branch_extra;
2416        branch_extra = branch_newextra;
2417    
2418      brastack[brastackptr++] = length;      brastack[brastackptr++] = length;
2419      length += 3;      length += 3;
2420      continue;      continue;
# Line 1806  while ((c = *(++ptr)) != 0) Line 2422  while ((c = *(++ptr)) != 0)
2422      /* Handle ket. Look for subsequent max/min; for certain sets of values we      /* Handle ket. Look for subsequent max/min; for certain sets of values we
2423      have to replicate this bracket up to that many times. If brastackptr is      have to replicate this bracket up to that many times. If brastackptr is
2424      0 this is an unmatched bracket which will generate an error, but take care      0 this is an unmatched bracket which will generate an error, but take care
2425      not to try to access brastack[-1]. */      not to try to access brastack[-1] when computing the length and restoring
2426        the branch_extra value. */
2427    
2428      case ')':      case ')':
2429      length += 3;      length += 3;
2430        {        {
2431        int minval = 1;        int minval = 1;
2432        int maxval = 1;        int maxval = 1;
2433        int duplength = (brastackptr > 0)? length - brastack[--brastackptr] : 0;        int duplength;
2434    
2435          if (brastackptr > 0)
2436            {
2437            duplength = length - brastack[--brastackptr];
2438            branch_extra = bralenstack[brastackptr];
2439            }
2440          else duplength = 0;
2441    
2442        /* Leave ptr at the final char; for read_repeat_counts this happens        /* Leave ptr at the final char; for read_repeat_counts this happens
2443        automatically; for the others we need an increment. */        automatically; for the others we need an increment. */
2444    
2445        if ((c = ptr[1]) == '{' && is_counted_repeat(ptr+2))        if ((c = ptr[1]) == '{' && is_counted_repeat(ptr+2, &compile_block))
2446          {          {
2447          ptr = read_repeat_counts(ptr+2, &minval, &maxval, errorptr);          ptr = read_repeat_counts(ptr+2, &minval, &maxval, errorptr,
2448              &compile_block);
2449          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
2450          }          }
2451        else if (c == '*') { minval = 0; maxval = -1; ptr++; }        else if (c == '*') { minval = 0; maxval = -1; ptr++; }
2452        else if (c == '+') { maxval = -1; ptr++; }        else if (c == '+') { maxval = -1; ptr++; }
2453        else if (c == '?') { minval = 0; ptr++; }        else if (c == '?') { minval = 0; ptr++; }
2454    
2455        /* If there is a minimum > 1 we have to replicate up to minval-1 times;        /* If the minimum is zero, we have to allow for an OP_BRAZERO before the
2456        if there is a limited maximum we have to replicate up to maxval-1 times        group, and if the maximum is greater than zero, we have to replicate
2457        and allow for a BRAZERO item before each optional copy, as we also have        maxval-1 times; each replication acquires an OP_BRAZERO plus a nesting
2458        to do before the first copy if the minimum is zero. */        bracket set - hence the 7. */
2459    
2460        if (minval == 0) length++;        if (minval == 0)
2461          else if (minval > 1) length += (minval - 1) * duplength;          {
2462        if (maxval > minval) length += (maxval - minval) * (duplength + 1);          length++;
2463            if (maxval > 0) length += (maxval - 1) * (duplength + 7);
2464            }
2465    
2466          /* When the minimum is greater than zero, 1 we have to replicate up to
2467          minval-1 times, with no additions required in the copies. Then, if
2468          there is a limited maximum we have to replicate up to maxval-1 times
2469          allowing for a BRAZERO item before each optional copy and nesting
2470          brackets for all but one of the optional copies. */
2471    
2472          else
2473            {
2474            length += (minval - 1) * duplength;
2475            if (maxval > minval)   /* Need this test as maxval=-1 means no limit */
2476              length += (maxval - minval) * (duplength + 7) - 6;
2477            }
2478        }        }
2479      continue;      continue;
2480    
# Line 1849  while ((c = *(++ptr)) != 0) Line 2489  while ((c = *(++ptr)) != 0)
2489      runlength = 0;      runlength = 0;
2490      do      do
2491        {        {
2492        if ((pcre_ctypes[c] & ctype_space) != 0)        if ((options & PCRE_EXTENDED) != 0)
         {  
         if ((options & PCRE_EXTENDED) != 0) continue;  
         spaces++;  
         }  
   
       if (c == '#' && (options & PCRE_EXTENDED) != 0)  
2493          {          {
2494          while ((c = *(++ptr)) != 0 && c != '\n');          if ((compile_block.ctypes[c] & ctype_space) != 0) continue;
2495          continue;          if (c == '#')
2496              {
2497              while ((c = *(++ptr)) != 0 && c != '\n');
2498              continue;
2499              }
2500          }          }
2501    
2502        /* Backslash may introduce a data char or a metacharacter; stop the        /* Backslash may introduce a data char or a metacharacter; stop the
# Line 1867  while ((c = *(++ptr)) != 0) Line 2505  while ((c = *(++ptr)) != 0)
2505        if (c == '\\')        if (c == '\\')
2506          {          {
2507          const uschar *saveptr = ptr;          const uschar *saveptr = ptr;
2508          c = check_escape(&ptr, errorptr, bracount, options, FALSE);          c = check_escape(&ptr, errorptr, bracount, options, FALSE,
2509              &compile_block);
2510          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
2511          if (c < 0) { ptr = saveptr; break; }          if (c < 0) { ptr = saveptr; break; }
2512          }          }
# Line 1879  while ((c = *(++ptr)) != 0) Line 2518  while ((c = *(++ptr)) != 0)
2518    
2519      /* This "while" is the end of the "do" above. */      /* This "while" is the end of the "do" above. */
2520    
2521      while (runlength < 255 && (pcre_ctypes[c = *(++ptr)] & ctype_meta) == 0);      while (runlength < 255 &&
2522          (compile_block.ctypes[c = *(++ptr)] & ctype_meta) == 0);
2523    
2524      ptr--;      ptr--;
2525      length += runlength;      length += runlength;
# Line 1914  if (re == NULL) Line 2554  if (re == NULL)
2554    
2555  re->magic_number = MAGIC_NUMBER;  re->magic_number = MAGIC_NUMBER;
2556  re->options = options;  re->options = options;
2557    re->tables = tables;
2558    
2559  /* Set up a starting, non-extracting bracket, then compile the expression. On  /* Set up a starting, non-extracting bracket, then compile the expression. On
2560  error, *errorptr will be set non-NULL, so we don't need to look at the result  error, *errorptr will be set non-NULL, so we don't need to look at the result
# Line 1923  ptr = (const uschar *)pattern; Line 2564  ptr = (const uschar *)pattern;
2564  code = re->code;  code = re->code;
2565  *code = OP_BRA;  *code = OP_BRA;
2566  bracount = 0;  bracount = 0;
2567  (void)compile_regex(options, &bracount, &code, &ptr, errorptr);  (void)compile_regex(options, -1, &bracount, &code, &ptr, errorptr, FALSE, -1,
2568      &reqchar, &countlits, &compile_block);
2569  re->top_bracket = bracount;  re->top_bracket = bracount;
2570  re->top_backref = top_backref;  re->top_backref = top_backref;
2571    
# Line 1940  if debugging, leave the test till after Line 2582  if debugging, leave the test till after
2582  if (code - re->code > length) *errorptr = ERR23;  if (code - re->code > length) *errorptr = ERR23;
2583  #endif  #endif
2584    
2585    /* Give an error if there's back reference to a non-existent capturing
2586    subpattern. */
2587    
2588    if (top_backref > re->top_bracket) *errorptr = ERR15;
2589    
2590  /* Failed to compile */  /* Failed to compile */
2591    
2592  if (*errorptr != NULL)  if (*errorptr != NULL)
# Line 1950  if (*errorptr != NULL) Line 2597  if (*errorptr != NULL)
2597    return NULL;    return NULL;
2598    }    }
2599    
2600  /* If the anchored option was not passed, set flag if we can determine that it  /* If the anchored option was not passed, set flag if we can determine that the
2601  is anchored by virtue of ^ characters or \A or anything else. Otherwise, see if  pattern is anchored by virtue of ^ characters or \A or anything else (such as
2602  we can determine what the first character has to be, because that speeds up  starting with .* when DOTALL is set).
2603  unanchored matches no end. In the case of multiline matches, an alternative is  
2604  to set the PCRE_STARTLINE flag if all branches start with ^. */  Otherwise, see if we can determine what the first character has to be, because
2605    that speeds up unanchored matches no end. If not, see if we can set the
2606    PCRE_STARTLINE flag. This is helpful for multiline matches when all branches
2607    start with ^. and also when all branches start with .* for non-DOTALL matches.
2608    */
2609    
2610  if ((options & PCRE_ANCHORED) == 0)  if ((options & PCRE_ANCHORED) == 0)
2611    {    {
2612    if (is_anchored(re->code, (options & PCRE_MULTILINE) != 0))    int temp_options = options;
2613      if (is_anchored(re->code, &temp_options))
2614      re->options |= PCRE_ANCHORED;      re->options |= PCRE_ANCHORED;
2615    else    else
2616      {      {
2617      int ch = find_firstchar(re->code);      int ch = find_firstchar(re->code, &temp_options);
2618      if (ch >= 0)      if (ch >= 0)
2619        {        {
2620        re->first_char = ch;        re->first_char = ch;
# Line 1973  if ((options & PCRE_ANCHORED) == 0) Line 2625  if ((options & PCRE_ANCHORED) == 0)
2625      }      }
2626    }    }
2627    
2628    /* Save the last required character if there are at least two literal
2629    characters on all paths, or if there is no first character setting. */
2630    
2631    if (reqchar >= 0 && (countlits > 1 || (re->options & PCRE_FIRSTSET) == 0))
2632      {
2633      re->req_char = reqchar;
2634      re->options |= PCRE_REQCHSET;
2635      }
2636    
2637  /* Print out the compiled data for debugging */  /* Print out the compiled data for debugging */
2638    
2639  #ifdef DEBUG  #ifdef DEBUG
2640    
2641  printf("Length = %d top_bracket = %d top_backref=%d\n",  printf("Length = %d top_bracket = %d top_backref = %d\n",
2642    length, re->top_bracket, re->top_backref);    length, re->top_bracket, re->top_backref);
2643    
2644  if (re->options != 0)  if (re->options != 0)
2645    {    {
2646    printf("%s%s%s%s%s%s%s\n",    printf("%s%s%s%s%s%s%s%s%s\n",
2647      ((re->options & PCRE_ANCHORED) != 0)? "anchored " : "",      ((re->options & PCRE_ANCHORED) != 0)? "anchored " : "",
2648      ((re->options & PCRE_CASELESS) != 0)? "caseless " : "",      ((re->options & PCRE_CASELESS) != 0)? "caseless " : "",
2649        ((re->options & PCRE_ICHANGED) != 0)? "case state changed " : "",
2650      ((re->options & PCRE_EXTENDED) != 0)? "extended " : "",      ((re->options & PCRE_EXTENDED) != 0)? "extended " : "",
2651      ((re->options & PCRE_MULTILINE) != 0)? "multiline " : "",      ((re->options & PCRE_MULTILINE) != 0)? "multiline " : "",
2652      ((re->options & PCRE_DOTALL) != 0)? "dotall " : "",      ((re->options & PCRE_DOTALL) != 0)? "dotall " : "",
2653      ((re->options & PCRE_DOLLAR_ENDONLY) != 0)? "endonly " : "",      ((re->options & PCRE_DOLLAR_ENDONLY) != 0)? "endonly " : "",
2654      ((re->options & PCRE_EXTRA) != 0)? "extra " : "");      ((re->options & PCRE_EXTRA) != 0)? "extra " : "",
2655        ((re->options & PCRE_UNGREEDY) != 0)? "ungreedy " : "");
2656    }    }
2657    
2658  if ((re->options & PCRE_FIRSTSET) != 0)  if ((re->options & PCRE_FIRSTSET) != 0)
# Line 1998  if ((re->options & PCRE_FIRSTSET) != 0) Line 2661  if ((re->options & PCRE_FIRSTSET) != 0)
2661      else printf("First char = \\x%02x\n", re->first_char);      else printf("First char = \\x%02x\n", re->first_char);
2662    }    }
2663    
2664    if ((re->options & PCRE_REQCHSET) != 0)
2665      {
2666      if (isprint(re->req_char)) printf("Req char = %c\n", re->req_char);
2667        else printf("Req char = \\x%02x\n", re->req_char);
2668      }
2669    
2670  code_end = code;  code_end = code;
2671  code_base = code = re->code;  code_base = code = re->code;
2672    
# Line 2015  while (code < code_end) Line 2684  while (code < code_end)
2684    
2685    else switch(*code)    else switch(*code)
2686      {      {
2687        case OP_OPT:
2688        printf(" %.2x %s", code[1], OP_names[*code]);
2689        code++;
2690        break;
2691    
2692        case OP_COND:
2693        printf("%3d Cond", (code[1] << 8) + code[2]);
2694        code += 2;
2695        break;
2696    
2697        case OP_CREF:
2698        printf(" %.2d %s", code[1], OP_names[*code]);
2699        code++;
2700        break;
2701    
2702      case OP_CHARS:      case OP_CHARS:
2703      charlength = *(++code);      charlength = *(++code);
2704      printf("%3d ", charlength);      printf("%3d ", charlength);
# Line 2028  while (code < code_end) Line 2712  while (code < code_end)
2712      case OP_KET:      case OP_KET:
2713      case OP_ASSERT:      case OP_ASSERT:
2714      case OP_ASSERT_NOT:      case OP_ASSERT_NOT:
2715        case OP_ASSERTBACK:
2716        case OP_ASSERTBACK_NOT:
2717      case OP_ONCE:      case OP_ONCE:
2718      printf("%3d %s", (code[1] << 8) + code[2], OP_names[*code]);      printf("%3d %s", (code[1] << 8) + code[2], OP_names[*code]);
2719      code += 2;      code += 2;
2720      break;      break;
2721    
2722        case OP_REVERSE:
2723        printf("%3d %s", (code[1] << 8) + code[2], OP_names[*code]);
2724        code += 2;
2725        break;
2726    
2727      case OP_STAR:      case OP_STAR:
2728      case OP_MINSTAR:      case OP_MINSTAR:
2729      case OP_PLUS:      case OP_PLUS:
# Line 2106  while (code < code_end) Line 2797  while (code < code_end)
2797      goto CLASS_REF_REPEAT;      goto CLASS_REF_REPEAT;
2798    
2799      case OP_CLASS:      case OP_CLASS:
     case OP_NEGCLASS:  
2800        {        {
2801        int i, min, max;        int i, min, max;
2802          code++;
2803        if (*code++ == OP_CLASS) printf("    [");        printf("    [");
         else printf("   ^[");  
2804    
2805        for (i = 0; i < 256; i++)        for (i = 0; i < 256; i++)
2806          {          {
# Line 2193  return (pcre *)re; Line 2882  return (pcre *)re;
2882    
2883    
2884  /*************************************************  /*************************************************
 *        Match a character type                  *  
 *************************************************/  
   
 /* Not used in all the places it might be as it's sometimes faster  
 to put the code inline.  
   
 Arguments:  
   type        the character type  
   c           the character  
   dotall      the dotall flag  
   
 Returns:      TRUE if character is of the type  
 */  
   
 static BOOL  
 match_type(int type, int c, BOOL dotall)  
 {  
   
 #ifdef DEBUG  
 if (isprint(c)) printf("matching subject %c against ", c);  
   else printf("matching subject \\x%02x against ", c);  
 printf("%s\n", OP_names[type]);  
 #endif  
   
 switch(type)  
   {  
   case OP_ANY:            return dotall || c != '\n';  
   case OP_NOT_DIGIT:      return (pcre_ctypes[c] & ctype_digit) == 0;  
   case OP_DIGIT:          return (pcre_ctypes[c] & ctype_digit) != 0;  
   case OP_NOT_WHITESPACE: return (pcre_ctypes[c] & ctype_space) == 0;  
   case OP_WHITESPACE:     return (pcre_ctypes[c] & ctype_space) != 0;  
   case OP_NOT_WORDCHAR:   return (pcre_ctypes[c] & ctype_word) == 0;  
   case OP_WORDCHAR:       return (pcre_ctypes[c] & ctype_word) != 0;  
   }  
 return FALSE;  
 }  
   
   
   
 /*************************************************  
2885  *          Match a back-reference                *  *          Match a back-reference                *
2886  *************************************************/  *************************************************/
2887    
2888  /* If a back reference hasn't been set, the match fails.  /* If a back reference hasn't been set, the length that is passed is greater
2889    than the number of characters left in the string, so the match fails.
2890    
2891  Arguments:  Arguments:
2892    number      reference number    offset      index into the offset vector
2893    eptr        points into the subject    eptr        points into the subject
2894    length      length to be matched    length      length to be matched
2895    md          points to match data block    md          points to match data block
2896      ims         the ims flags
2897    
2898  Returns:      TRUE if matched  Returns:      TRUE if matched
2899  */  */
2900    
2901  static BOOL  static BOOL
2902  match_ref(int number, register const uschar *eptr, int length, match_data *md)  match_ref(int offset, register const uschar *eptr, int length, match_data *md,
2903      unsigned long int ims)
2904  {  {
2905  const uschar *p = md->start_subject + md->offset_vector[number];  const uschar *p = md->start_subject + md->offset_vector[offset];
2906    
2907  #ifdef DEBUG  #ifdef DEBUG
2908  if (eptr >= md->end_subject)  if (eptr >= md->end_subject)
# Line 2267  printf("\n"); Line 2919  printf("\n");
2919    
2920  /* Always fail if not enough characters left */  /* Always fail if not enough characters left */
2921    
2922  if (length > md->end_subject - p) return FALSE;  if (length > md->end_subject - eptr) return FALSE;
2923    
2924  /* Separate the caselesss case for speed */  /* Separate the caselesss case for speed */
2925    
2926  if (md->caseless)  if ((ims & PCRE_CASELESS) != 0)
2927    { while (length-- > 0) if (pcre_lcc[*p++] != pcre_lcc[*eptr++]) return FALSE; }    {
2928      while (length-- > 0)
2929        if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE;
2930      }
2931  else  else
2932    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
2933    
# Line 2285  return TRUE; Line 2940  return TRUE;
2940  *         Match from current position            *  *         Match from current position            *
2941  *************************************************/  *************************************************/
2942    
2943  /* On entry ecode points to the first opcode, and eptr to the first character.  /* On entry ecode points to the first opcode, and eptr to the first character
2944    in the subject string, while eptrb holds the value of eptr at the start of the
2945    last bracketed group - used for breaking infinite loops matching zero-length
2946    strings.
2947    
2948  Arguments:  Arguments:
2949     eptr        pointer in subject     eptr        pointer in subject
2950     ecode       position in code     ecode       position in code
2951     offset_top  current top pointer     offset_top  current top pointer
2952     md          pointer to "static" info for the match     md          pointer to "static" info for the match
2953       ims         current /i, /m, and /s options
2954       condassert  TRUE if called to check a condition assertion
2955       eptrb       eptr at start of last bracket
2956    
2957  Returns:       TRUE if matched  Returns:       TRUE if matched
2958  */  */
2959    
2960  static BOOL  static BOOL
2961  match(register const uschar *eptr, register const uschar *ecode, int offset_top,  match(register const uschar *eptr, register const uschar *ecode,
2962    match_data *md)    int offset_top, match_data *md, unsigned long int ims, BOOL condassert,
2963      const uschar *eptrb)
2964  {  {
2965    unsigned long int original_ims = ims;   /* Save for resetting on ')' */
2966    
2967  for (;;)  for (;;)
2968    {    {
2969      int op = (int)*ecode;
2970    int min, max, ctype;    int min, max, ctype;
2971    register int i;    register int i;
2972    register int c;    register int c;
2973    BOOL minimize = FALSE;    BOOL minimize = FALSE;
2974    
2975    /* Opening bracket. Check the alternative branches in turn, failing if none    /* Opening capturing bracket. If there is space in the offset vector, save
2976    match. We have to set the start offset if required and there is space    the current subject position in the working slot at the top of the vector. We
2977    in the offset vector so that it is available for subsequent back references    mustn't change the current values of the data slot, because they may be set
2978    if the bracket matches. However, if the bracket fails, we must put back the    from a previous iteration of this group, and be referred to by a reference
2979    previous value of both offsets in case they were set by a previous copy of    inside the group.
2980    the same bracket. Don't worry about setting the flag for the error case here;  
2981    that is handled in the code for KET. */    If the bracket fails to match, we need to restore this value and also the
2982      values of the final offsets, in case they were set by a previous iteration of
2983      the same bracket.
2984    
2985      If there isn't enough space in the offset vector, treat this as if it were a
2986      non-capturing bracket. Don't worry about setting the flag for the error case
2987      here; that is handled in the code for KET. */
2988    
2989    if ((int)*ecode >= OP_BRA)    if (op > OP_BRA)
2990      {      {
2991      int number = (*ecode - OP_BRA) << 1;      int number = op - OP_BRA;
2992      int save_offset1 = 0, save_offset2 = 0;      int offset = number << 1;
2993    
2994      DPRINTF(("start bracket %d\n", number/2));  #ifdef DEBUG
2995        printf("start bracket %d subject=", number);
2996        pchars(eptr, 16, TRUE, md);
2997        printf("\n");
2998    #endif
2999    
3000      if (number > 0 && number < md->offset_end)      if (offset < md->offset_max)
3001        {        {
3002        save_offset1 = md->offset_vector[number];        int save_offset1 = md->offset_vector[offset];
3003        save_offset2 = md->offset_vector[number+1];        int save_offset2 = md->offset_vector[offset+1];
3004        md->offset_vector[number] = eptr - md->start_subject;        int save_offset3 = md->offset_vector[md->offset_end - number];
3005    
3006          DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
3007          md->offset_vector[md->offset_end - number] = eptr - md->start_subject;
3008    
3009          do
3010            {
3011            if (match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr)) return TRUE;
3012            ecode += (ecode[1] << 8) + ecode[2];
3013            }
3014          while (*ecode == OP_ALT);
3015    
3016        DPRINTF(("saving %d %d\n", save_offset1, save_offset2));        DPRINTF(("bracket %d failed\n", number));
3017    
3018          md->offset_vector[offset] = save_offset1;
3019          md->offset_vector[offset+1] = save_offset2;
3020          md->offset_vector[md->offset_end - number] = save_offset3;
3021          return FALSE;
3022        }        }
3023    
3024      /* Recurse for all the alternatives. */      /* Insufficient room for saving captured contents */
3025    
3026        else op = OP_BRA;
3027        }
3028    
3029      /* Other types of node can be handled by a switch */
3030    
3031      switch(op)
3032        {
3033        case OP_BRA:     /* Non-capturing bracket: optimized */
3034        DPRINTF(("start bracket 0\n"));
3035      do      do
3036        {        {
3037        if (match(eptr, ecode+3, offset_top, md)) return TRUE;        if (match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr)) return TRUE;
3038        ecode += (ecode[1] << 8) + ecode[2];        ecode += (ecode[1] << 8) + ecode[2];
3039        }        }
3040      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
3041        DPRINTF(("bracket 0 failed\n"));
3042        return FALSE;
3043    
3044      DPRINTF(("bracket %d failed\n", number/2));      /* Conditional group: compilation checked that there are no more than
3045        two branches. If the condition is false, skipping the first branch takes us
3046        past the end if there is only one branch, but that's OK because that is
3047        exactly what going to the ket would do. */
3048    
3049        case OP_COND:
3050        if (ecode[3] == OP_CREF)         /* Condition is extraction test */
3051          {
3052          int offset = ecode[4] << 1;    /* Doubled reference number */
3053          return match(eptr,
3054            ecode + ((offset < offset_top && md->offset_vector[offset] >= 0)?
3055              5 : 3 + (ecode[1] << 8) + ecode[2]),
3056            offset_top, md, ims, FALSE, eptr);
3057          }
3058    
3059        /* The condition is an assertion. Call match() to evaluate it - setting
3060        the final argument TRUE causes it to stop at the end of an assertion. */
3061    
3062      if (number > 0 && number < md->offset_end)      else
3063        {        {
3064        md->offset_vector[number] = save_offset1;        if (match(eptr, ecode+3, offset_top, md, ims, TRUE, NULL))
3065        md->offset_vector[number+1] = save_offset2;          {
3066            ecode += 3 + (ecode[4] << 8) + ecode[5];
3067            while (*ecode == OP_ALT) ecode += (ecode[1] << 8) + ecode[2];
3068            }
3069          else ecode += (ecode[1] << 8) + ecode[2];
3070          return match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr);
3071        }        }
3072        /* Control never reaches here */
3073    
3074      return FALSE;      /* Skip over conditional reference data if encountered (should not be) */
     }  
3075    
3076    /* Other types of node can be handled by a switch */      case OP_CREF:
3077        ecode += 2;
3078        break;
3079    
3080        /* End of the pattern. If PCRE_NOTEMPTY is set, fail if we have matched
3081        an empty string - recursion will then try other alternatives, if any. */
3082    
   switch(*ecode)  
     {  
3083      case OP_END:      case OP_END:
3084        if (md->notempty && eptr == md->start_match) return FALSE;
3085      md->end_match_ptr = eptr;          /* Record where we ended */      md->end_match_ptr = eptr;          /* Record where we ended */
3086      md->end_offset_top = offset_top;   /* and how many extracts were taken */      md->end_offset_top = offset_top;   /* and how many extracts were taken */
3087      return TRUE;      return TRUE;
3088    
3089      /* The equivalent of Prolog's "cut" - if the rest doesn't match, the      /* Change option settings */
     whole thing doesn't match, so we have to get out via a longjmp(). */  
3090    
3091      case OP_CUT:      case OP_OPT:
3092      if (match(eptr, ecode+1, offset_top, md)) return TRUE;      ims = ecode[1];
3093      longjmp(md->fail_env, 1);      ecode += 2;
3094        DPRINTF(("ims set to %02lx\n", ims));
3095        break;
3096    
3097      /* Assertion brackets. Check the alternative branches in turn - the      /* Assertion brackets. Check the alternative branches in turn - the
3098      matching won't pass the KET for an assertion. If any one branch matches,      matching won't pass the KET for an assertion. If any one branch matches,
3099      the assertion is true. */      the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
3100        start of each branch to move the current point backwards, so the code at
3101        this level is identical to the lookahead case. */
3102    
3103      case OP_ASSERT:      case OP_ASSERT:
3104        case OP_ASSERTBACK:
3105      do      do
3106        {        {
3107        if (match(eptr, ecode+3, offset_top, md)) break;        if (match(eptr, ecode+3, offset_top, md, ims, FALSE, NULL)) break;
3108        ecode += (ecode[1] << 8) + ecode[2];        ecode += (ecode[1] << 8) + ecode[2];
3109        }        }
3110      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
3111      if (*ecode == OP_KET) return FALSE;      if (*ecode == OP_KET) return FALSE;
3112    
3113        /* If checking an assertion for a condition, return TRUE. */
3114    
3115        if (condassert) return TRUE;
3116    
3117      /* Continue from after the assertion, updating the offsets high water      /* Continue from after the assertion, updating the offsets high water
3118      mark, since extracts may have been taken during the assertion. */      mark, since extracts may have been taken during the assertion. */
3119    
# Line 2391  for (;;) Line 3125  for (;;)
3125      /* Negative assertion: all branches must fail to match */      /* Negative assertion: all branches must fail to match */
3126    
3127      case OP_ASSERT_NOT:      case OP_ASSERT_NOT:
3128        case OP_ASSERTBACK_NOT:
3129      do      do
3130        {        {
3131        if (match(eptr, ecode+3, offset_top, md)) return FALSE;        if (match(eptr, ecode+3, offset_top, md, ims, FALSE, NULL)) return FALSE;
3132        ecode += (ecode[1] << 8) + ecode[2];        ecode += (ecode[1] << 8) + ecode[2];
3133        }        }
3134      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
3135    
3136        if (condassert) return TRUE;
3137      ecode += 3;      ecode += 3;
3138      continue;      continue;
3139    
3140        /* Move the subject pointer back. This occurs only at the start of
3141        each branch of a lookbehind assertion. If we are too close to the start to
3142        move back, this match function fails. */
3143    
3144        case OP_REVERSE:
3145        eptr -= (ecode[1] << 8) + ecode[2];
3146        if (eptr < md->start_subject) return FALSE;
3147        ecode += 3;
3148        break;
3149    
3150    
3151      /* "Once" brackets are like assertion brackets except that after a match,      /* "Once" brackets are like assertion brackets except that after a match,
3152      the point in the subject string is not moved back. Thus there can never be      the point in the subject string is not moved back. Thus there can never be
3153      a move back into the brackets. Check the alternative branches in turn - the      a move back into the brackets. Check the alternative branches in turn - the
3154      matching won't pass the KET for this kind of subpattern. If any one branch      matching won't pass the KET for this kind of subpattern. If any one branch
3155      matches, we carry on, leaving the subject pointer. */      matches, we carry on as at the end of a normal bracket, leaving the subject
3156        pointer. */
3157    
3158      case OP_ONCE:      case OP_ONCE:
     do  
3159        {        {
3160        if (match(eptr, ecode+3, offset_top, md)) break;        const uschar *prev = ecode;
       ecode += (ecode[1] << 8) + ecode[2];  
       }  
     while (*ecode == OP_ALT);  
     if (*ecode == OP_KET) return FALSE;  
3161    
3162      /* Continue as from after the assertion, updating the offsets high water        do
3163      mark, since extracts may have been taken. */          {
3164            if (match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr)) break;
3165            ecode += (ecode[1] << 8) + ecode[2];
3166            }
3167          while (*ecode == OP_ALT);
3168    
3169      do ecode += (ecode[1] << 8) + ecode[2]; while (*ecode == OP_ALT);        /* If hit the end of the group (which could be repeated), fail */
3170      ecode += 3;  
3171      offset_top = md->end_offset_top;        if (*ecode != OP_ONCE && *ecode != OP_ALT) return FALSE;
3172      eptr = md->end_match_ptr;  
3173      continue;        /* Continue as from after the assertion, updating the offsets high water
3174          mark, since extracts may have been taken. */
3175    
3176          do ecode += (ecode[1] << 8) + ecode[2]; while (*ecode == OP_ALT);
3177    
3178          offset_top = md->end_offset_top;
3179          eptr = md->end_match_ptr;
3180    
3181          /* For a non-repeating ket, just continue at this level. This also
3182          happens for a repeating ket if no characters were matched in the group.
3183          This is the forcible breaking of infinite loops as implemented in Perl
3184          5.005. If there is an options reset, it will get obeyed in the normal
3185          course of events. */
3186    
3187          if (*ecode == OP_KET || eptr == eptrb)
3188            {
3189            ecode += 3;
3190            break;
3191            }
3192    
3193          /* The repeating kets try the rest of the pattern or restart from the
3194          preceding bracket, in the appropriate order. We need to reset any options
3195          that changed within the bracket before re-running it, so check the next
3196          opcode. */
3197    
3198          if (ecode[3] == OP_OPT)
3199            {
3200            ims = (ims & ~PCRE_IMS) | ecode[4];
3201            DPRINTF(("ims set to %02lx at group repeat\n", ims));
3202            }
3203    
3204          if (*ecode == OP_KETRMIN)
3205            {
3206            if (match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr) ||
3207                match(eptr, prev, offset_top, md, ims, FALSE, eptr)) return TRUE;
3208            }
3209          else  /* OP_KETRMAX */
3210            {
3211            if (match(eptr, prev, offset_top, md, ims, FALSE, eptr) ||
3212                match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr)) return TRUE;
3213            }
3214          }
3215        return FALSE;
3216    
3217      /* An alternation is the end of a branch; scan along to find the end of the      /* An alternation is the end of a branch; scan along to find the end of the
3218      bracketed group and go to there. */      bracketed group and go to there. */
# Line 2440  for (;;) Line 3230  for (;;)
3230      case OP_BRAZERO:      case OP_BRAZERO:
3231        {        {
3232        const uschar *next = ecode+1;        const uschar *next = ecode+1;
3233        if (match(eptr, next, offset_top, md)) return TRUE;        if (match(eptr, next, offset_top, md, ims, FALSE, eptr)) return TRUE;
3234        do next += (next[1] << 8) + next[2]; while (*next == OP_ALT);        do next += (next[1] << 8) + next[2]; while (*next == OP_ALT);
3235        ecode = next + 3;        ecode = next + 3;
3236        }        }
# Line 2450  for (;;) Line 3240  for (;;)
3240        {        {
3241        const uschar *next = ecode+1;        const uschar *next = ecode+1;
3242        do next += (next[1] << 8) + next[2]; while (*next == OP_ALT);        do next += (next[1] << 8) + next[2]; while (*next == OP_ALT);
3243        if (match(eptr, next+3, offset_top, md)) return TRUE;        if (match(eptr, next+3, offset_top, md, ims, FALSE, eptr)) return TRUE;
3244        ecode++;        ecode++;
3245        }        }
3246      break;;      break;
3247    
3248      /* End of a group, repeated or non-repeating. If we are at the end of      /* End of a group, repeated or non-repeating. If we are at the end of
3249      an assertion "group", stop matching and return TRUE, but record the      an assertion "group", stop matching and return TRUE, but record the
3250      current high water mark for use by positive assertions. */      current high water mark for use by positive assertions. Do this also
3251        for the "once" (not-backup up) groups. */
3252    
3253      case OP_KET:      case OP_KET:
3254      case OP_KETRMIN:      case OP_KETRMIN:
3255      case OP_KETRMAX:      case OP_KETRMAX:
3256        {        {
       int number;  
3257        const uschar *prev = ecode - (ecode[1] << 8) - ecode[2];        const uschar *prev = ecode - (ecode[1] << 8) - ecode[2];
3258    
3259        if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT || *prev == OP_ONCE)        if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
3260              *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
3261              *prev == OP_ONCE)
3262          {          {
3263          md->end_match_ptr = eptr;      /* For ONCE */          md->end_match_ptr = eptr;      /* For ONCE */
3264          md->end_offset_top = offset_top;          md->end_offset_top = offset_top;
3265          return TRUE;          return TRUE;
3266          }          }
3267    
3268        /* In all other cases we have to check the group number back at the        /* In all other cases except a conditional group we have to check the
3269        start and if necessary complete handling an extraction by setting the        group number back at the start and if necessary complete handling an
3270        final offset and bumping the high water mark. */        extraction by setting the offsets and bumping the high water mark. */
3271    
3272        number = (*prev - OP_BRA) << 1;        if (*prev != OP_COND)
3273            {
3274            int number = *prev - OP_BRA;
3275            int offset = number << 1;
3276    
3277        DPRINTF(("end bracket %d\n", number/2));          DPRINTF(("end bracket %d\n", number));
3278    
3279        if (number > 0)          if (number > 0)
         {  
         if (number >= md->offset_end) md->offset_overflow = TRUE; else  
3280            {            {
3281            md->offset_vector[number+1] = eptr - md->start_subject;            if (offset >= md->offset_max) md->offset_overflow = TRUE; else
3282            if (offset_top <= number) offset_top = number + 2;              {
3283                md->offset_vector[offset] =
3284                  md->offset_vector[md->offset_end - number];
3285                md->offset_vector[offset+1] = eptr - md->start_subject;
3286                if (offset_top <= offset) offset_top = offset + 2;
3287                }
3288            }            }
3289          }          }
3290    
3291        /* For a non-repeating ket, just advance to the next node and continue at        /* Reset the value of the ims flags, in case they got changed during
3292        this level. */        the group. */
3293    
3294          ims = original_ims;
3295          DPRINTF(("ims reset to %02lx\n", ims));
3296    
3297        if (*ecode == OP_KET)        /* For a non-repeating ket, just continue at this level. This also
3298          happens for a repeating ket if no characters were matched in the group.
3299          This is the forcible breaking of infinite loops as implemented in Perl
3300          5.005. If there is an options reset, it will get obeyed in the normal
3301          course of events. */
3302    
3303          if (*ecode == OP_KET || eptr == eptrb)
3304          {          {
3305          ecode += 3;          ecode += 3;
3306          break;          break;
# Line 2504  for (;;) Line 3311  for (;;)
3311    
3312        if (*ecode == OP_KETRMIN)        if (*ecode == OP_KETRMIN)
3313          {          {
3314          if (match(eptr, ecode+3, offset_top, md) ||          if (match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr) ||
3315              match(eptr, prev, offset_top, md)) return TRUE;              match(eptr, prev, offset_top, md, ims, FALSE, eptr)) return TRUE;
3316          }          }
3317        else  /* OP_KETRMAX */        else  /* OP_KETRMAX */
3318          {          {
3319          if (match(eptr, prev, offset_top, md) ||          if (match(eptr, prev, offset_top, md, ims, FALSE, eptr) ||
3320              match(eptr, ecode+3, offset_top, md)) return TRUE;              match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr)) return TRUE;
3321          }          }
3322        }        }
3323      return FALSE;      return FALSE;
# Line 2519  for (;;) Line 3326  for (;;)
3326    
3327      case OP_CIRC:      case OP_CIRC:
3328      if (md->notbol && eptr == md->start_subject) return FALSE;      if (md->notbol && eptr == md->start_subject) return FALSE;
3329      if (md->multiline)      if ((ims & PCRE_MULTILINE) != 0)
3330        {        {
3331        if (eptr != md->start_subject && eptr[-1] != '\n') return FALSE;        if (eptr != md->start_subject && eptr[-1] != '\n') return FALSE;
3332        ecode++;        ecode++;
# Line 2534  for (;;) Line 3341  for (;;)
3341      ecode++;      ecode++;
3342      break;      break;
3343    
3344      /* Assert before internal newline if multiline, or before      /* Assert before internal newline if multiline, or before a terminating
3345      a terminating newline unless endonly is set, else end of subject unless      newline unless endonly is set, else end of subject unless noteol is set. */
     noteol is set. */  
3346    
3347      case OP_DOLL:      case OP_DOLL:
3348      if (md->noteol && eptr >= md->end_subject) return FALSE;      if ((ims & PCRE_MULTILINE) != 0)
     if (md->multiline)  
3349        {        {
3350        if (eptr < md->end_subject && *eptr != '\n') return FALSE;        if (eptr < md->end_subject) { if (*eptr != '\n') return FALSE; }
3351            else { if (md->noteol) return FALSE; }
3352        ecode++;        ecode++;
3353        break;        break;
3354        }        }
3355      else if (!md->endonly)      else
3356        {        {
3357        if (eptr < md->end_subject - 1 ||        if (md->noteol) return FALSE;
3358           (eptr == md->end_subject - 1 && *eptr != '\n')) return FALSE;        if (!md->endonly)
3359        ecode++;          {
3360        break;          if (eptr < md->end_subject - 1 ||
3361               (eptr == md->end_subject - 1 && *eptr != '\n')) return FALSE;
3362    
3363            ecode++;
3364            break;
3365            }
3366        }        }
3367      /* ... else fall through */      /* ... else fall through */
3368    
3369      /* End of subject assertion */      /* End of subject assertion (\z) */
3370    
3371      case OP_EOD:      case OP_EOD:
3372      if (eptr < md->end_subject) return FALSE;      if (eptr < md->end_subject) return FALSE;
3373      ecode++;      ecode++;
3374      break;      break;
3375    
3376        /* End of subject or ending \n assertion (\Z) */
3377    
3378        case OP_EODN:
3379        if (eptr < md->end_subject - 1 ||
3380           (eptr == md->end_subject - 1 && *eptr != '\n')) return FALSE;
3381        ecode++;
3382        break;
3383    
3384      /* Word boundary assertions */      /* Word boundary assertions */
3385    
3386      case OP_NOT_WORD_BOUNDARY:      case OP_NOT_WORD_BOUNDARY:
3387      case OP_WORD_BOUNDARY:      case OP_WORD_BOUNDARY:
3388        {        {
3389        BOOL prev_is_word = (eptr != md->start_subject) &&        BOOL prev_is_word = (eptr != md->start_subject) &&
3390          ((pcre_ctypes[eptr[-1]] & ctype_word) != 0);          ((md->ctypes[eptr[-1]] & ctype_word) != 0);
3391        BOOL cur_is_word = (eptr < md->end_subject) &&        BOOL cur_is_word = (eptr < md->end_subject) &&
3392          ((pcre_ctypes[*eptr] & ctype_word) != 0);          ((md->ctypes[*eptr] & ctype_word) != 0);
3393        if ((*ecode++ == OP_WORD_BOUNDARY)?        if ((*ecode++ == OP_WORD_BOUNDARY)?
3394             cur_is_word == prev_is_word : cur_is_word != prev_is_word)             cur_is_word == prev_is_word : cur_is_word != prev_is_word)
3395          return FALSE;          return FALSE;
# Line 2580  for (;;) Line 3399  for (;;)
3399      /* Match a single character type; inline for speed */      /* Match a single character type; inline for speed */
3400    
3401      case OP_ANY:      case OP_ANY:
3402      if (!md->dotall && eptr < md->end_subject && *eptr == '\n') return FALSE;      if ((ims & PCRE_DOTALL) == 0 && eptr < md->end_subject && *eptr == '\n')
3403          return FALSE;
3404      if (eptr++ >= md->end_subject) return FALSE;      if (eptr++ >= md->end_subject) return FALSE;
3405      ecode++;      ecode++;
3406      break;      break;
3407    
3408      case OP_NOT_DIGIT:      case OP_NOT_DIGIT:
3409      if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_digit) != 0)      if (eptr >= md->end_subject ||
3410           (md->ctypes[*eptr++] & ctype_digit) != 0)
3411        return FALSE;        return FALSE;
3412      ecode++;      ecode++;
3413      break;      break;
3414    
3415      case OP_DIGIT:      case OP_DIGIT:
3416      if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_digit) == 0)      if (eptr >= md->end_subject ||
3417           (md->ctypes[*eptr++] & ctype_digit) == 0)
3418        return FALSE;        return FALSE;
3419      ecode++;      ecode++;
3420      break;      break;
3421    
3422      case OP_NOT_WHITESPACE:      case OP_NOT_WHITESPACE:
3423      if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_space) != 0)      if (eptr >= md->end_subject ||
3424           (md->ctypes[*eptr++] & ctype_space) != 0)
3425        return FALSE;        return FALSE;
3426      ecode++;      ecode++;
3427      break;      break;
3428    
3429      case OP_WHITESPACE:      case OP_WHITESPACE:
3430      if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_space) == 0)      if (eptr >= md->end_subject ||
3431           (md->ctypes[*eptr++] & ctype_space) == 0)
3432        return FALSE;        return FALSE;
3433      ecode++;      ecode++;
3434      break;      break;
3435    
3436      case OP_NOT_WORDCHAR:      case OP_NOT_WORDCHAR:
3437      if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_word) != 0)      if (eptr >= md->end_subject ||
3438           (md->ctypes[*eptr++] & ctype_word) != 0)
3439        return FALSE;        return FALSE;
3440      ecode++;      ecode++;
3441      break;      break;
3442    
3443      case OP_WORDCHAR:      case OP_WORDCHAR:
3444      if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_word) == 0)      if (eptr >= md->end_subject ||
3445           (md->ctypes[*eptr++] & ctype_word) == 0)
3446        return FALSE;        return FALSE;
3447      ecode++;      ecode++;
3448      break;      break;
# Line 2632  for (;;) Line 3458  for (;;)
3458      case OP_REF:      case OP_REF:
3459        {        {
3460        int length;        int length;
3461        int number = ecode[1] << 1;                /* Doubled reference number */        int offset = ecode[1] << 1;                /* Doubled reference number */
3462        ecode += 2;                                /* Advance past the item */        ecode += 2;                                /* Advance past the item */
3463    
3464        if (number >= offset_top || md->offset_vector[number] < 0)        /* If the reference is unset, set the length to be longer than the amount
3465          {        of subject left; this ensures that every attempt at a match fails. We
3466          md->errorcode = PCRE_ERROR_BADREF;        can't just fail here, because of the possibility of quantifiers with zero
3467          return FALSE;        minima. */
3468          }  
3469          length = (offset >= offset_top || md->offset_vector[offset] < 0)?
3470            md->end_subject - eptr + 1 :
3471            md->offset_vector[offset+1] - md->offset_vector[offset];
3472    
3473        length = md->offset_vector[number+1] - md->offset_vector[number];        /* Set up for repetition, or handle the non-repeated case */
3474    
3475        switch (*ecode)        switch (*ecode)
3476          {          {
# Line 2668  for (;;) Line 3497  for (;;)
3497          break;          break;
3498    
3499          default:               /* No repeat follows */          default:               /* No repeat follows */
3500          if (!match_ref(number, eptr, length, md)) return FALSE;          if (!match_ref(offset, eptr, length, md, ims)) return FALSE;
3501          eptr += length;          eptr += length;
3502          continue;              /* With the main loop */          continue;              /* With the main loop */
3503          }          }
# Line 2684  for (;;) Line 3513  for (;;)
3513    
3514        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
3515          {          {
3516          if (!match_ref(number, eptr, length, md)) return FALSE;          if (!match_ref(offset, eptr, length, md, ims)) return FALSE;
3517          eptr += length;          eptr += length;
3518          }          }
3519    
# Line 2699  for (;;) Line 3528  for (;;)
3528          {          {
3529          for (i = min;; i++)          for (i = min;; i++)
3530            {            {
3531            if (match(eptr, ecode, offset_top, md)) return TRUE;            if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb))
3532            if (i >= max || !match_ref(number, eptr, length, md))              return TRUE;
3533              if (i >= max || !match_ref(offset, eptr, length, md, ims))
3534              return FALSE;              return FALSE;
3535            eptr += length;            eptr += length;
3536            }            }
# Line 2714  for (;;) Line 3544  for (;;)
3544          const uschar *pp = eptr;          const uschar *pp = eptr;
3545          for (i = min; i < max; i++)          for (i = min; i < max; i++)
3546            {            {
3547            if (!match_ref(number, eptr, length, md)) break;            if (!match_ref(offset, eptr, length, md, ims)) break;
3548            eptr += length;            eptr += length;
3549            }            }
3550          while (eptr >= pp)          while (eptr >= pp)
3551            {            {
3552            if (match(eptr, ecode, offset_top, md)) return TRUE;            if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb))
3553                return TRUE;
3554            eptr -= length;            eptr -= length;
3555            }            }
3556          return FALSE;          return FALSE;
# Line 2727  for (;;) Line 3558  for (;;)
3558        }        }
3559      /* Control never gets here */      /* Control never gets here */
3560    
3561    
3562    
3563      /* Match a character class, possibly repeatedly. Look past the end of the      /* Match a character class, possibly repeatedly. Look past the end of the
3564      item to see if there is repeat information following. Then obey similar      item to see if there is repeat information following. Then obey similar
3565      code to character type repeats - written out again for speed. If caseless      code to character type repeats - written out again for speed. */
     matching was set at runtime but not at compile time, we have to check both  
     versions of a character, and we have to behave differently for positive and  
     negative classes. This is the only time where OP_CLASS and OP_NEGCLASS are  
     treated differently. */  
3566    
3567      case OP_CLASS:      case OP_CLASS:
     case OP_NEGCLASS:  
3568        {        {
       BOOL nasty_case = *ecode == OP_NEGCLASS && md->runtime_caseless;  
3569        const uschar *data = ecode + 1;  /* Save for matching */        const uschar *data = ecode + 1;  /* Save for matching */
3570        ecode += 33;                     /* Advance past the item */        ecode += 33;                     /* Advance past the item */
3571    
# Line 2777  for (;;) Line 3604  for (;;)
3604          {          {
3605          if (eptr >= md->end_subject) return FALSE;          if (eptr >= md->end_subject) return FALSE;
3606          c = *eptr++;          c = *eptr++;
3607            if ((data[c/8] & (1 << (c&7))) != 0) continue;
         /* Either not runtime caseless, or it was a positive class. For  
         runtime caseless, continue if either case is in the map. */  
   
         if (!nasty_case)  
           {  
           if ((data[c/8] & (1 << (c&7))) != 0) continue;  
           if (md->runtime_caseless)  
             {  
             c = pcre_fcc[c];  
             if ((data[c/8] & (1 << (c&7))) != 0) continue;  
             }  
           }  
   
         /* Runtime caseless and it was a negative class. Continue only if  
         both cases are in the map. */  
   
         else  
           {  
           if ((data[c/8] & (1 << (c&7))) == 0) return FALSE;  
           c = pcre_fcc[c];  
           if ((data[c/8] & (1 << (c&7))) != 0) continue;  
           }  
   
3608          return FALSE;          return FALSE;
3609          }          }
3610    
# Line 2816  for (;;) Line 3620  for (;;)
3620          {          {
3621          for (i = min;; i++)          for (i = min;; i++)
3622            {            {
3623            if (match(eptr, ecode, offset_top, md)) return TRUE;            if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb))
3624                return TRUE;
3625            if (i >= max || eptr >= md->end_subject) return FALSE;            if (i >= max || eptr >= md->end_subject) return FALSE;
3626            c = *eptr++;            c = *eptr++;
3627              if ((data[c/8] & (1 << (c&7))) != 0) continue;
           /* Either not runtime caseless, or it was a positive class. For  
           runtime caseless, continue if either case is in the map. */  
   
           if (!nasty_case)  
             {  
             if ((data[c/8] & (1 << (c&7))) != 0) continue;  
             if (md->runtime_caseless)  
               {  
               c = pcre_fcc[c];  
               if ((data[c/8] & (1 << (c&7))) != 0) continue;  
               }  
             }  
   
           /* Runtime caseless and it was a negative class. Continue only if  
           both cases are in the map. */  
   
           else  
             {  
             if ((data[c/8] & (1 << (c&7))) == 0) return FALSE;  
             c = pcre_fcc[c];  
             if ((data[c/8] & (1 << (c&7))) != 0) continue;  
             }  
   
3628            return FALSE;            return FALSE;
3629            }            }
3630          /* Control never gets here */          /* Control never gets here */
# Line 2857  for (;;) Line 3639  for (;;)
3639            {            {
3640            if (eptr >= md->end_subject) break;            if (eptr >= md->end_subject) break;
3641            c = *eptr;            c = *eptr;
3642              if ((data[c/8] & (1 << (c&7))) != 0) continue;
           /* Either not runtime caseless, or it was a positive class. For  
           runtime caseless, continue if either case is in the map. */  
   
           if (!nasty_case)  
             {  
             if ((data[c/8] & (1 << (c&7))) != 0) continue;  
             if (md->runtime_caseless)  
               {  
               c = pcre_fcc[c];  
               if ((data[c/8] & (1 << (c&7))) != 0) continue;  
               }  
             }  
   
           /* Runtime caseless and it was a negative class. Continue only if  
           both cases are in the map. */  
   
           else  
             {  
             if ((data[c/8] & (1 << (c&7))) == 0) break;  
             c = pcre_fcc[c];  
             if ((data[c/8] & (1 << (c&7))) != 0) continue;  
             }  
   
3643            break;            break;
3644            }            }
3645    
3646          while (eptr >= pp)          while (eptr >= pp)
3647            if (match(eptr--, ecode, offset_top, md)) return TRUE;            if (match(eptr--, ecode, offset_top, md, ims, FALSE, eptrb))
3648                return TRUE;
3649          return FALSE;          return FALSE;
3650          }          }
3651        }        }
# Line 2912  for (;;) Line 3672  for (;;)
3672  #endif  #endif
3673    
3674        if (length > md->end_subject - eptr) return FALSE;        if (length > md->end_subject - eptr) return FALSE;
3675        if (md->caseless)        if ((ims & PCRE_CASELESS) != 0)
3676          {          {
3677          while (length-- > 0) if (pcre_lcc[*ecode++] != pcre_lcc[*eptr++]) return FALSE;          while (length-- > 0)
3678              if (md->lcc[*ecode++] != md->lcc[*eptr++])
3679                return FALSE;
3680          }          }
3681        else        else
3682          {          {
# Line 2969  for (;;) Line 3731  for (;;)
3731      DPRINTF(("matching %c{%d,%d} against subject %.*s\n", c, min, max,      DPRINTF(("matching %c{%d,%d} against subject %.*s\n", c, min, max,
3732        max, eptr));        max, eptr));
3733    
3734      if (md->caseless)      if ((ims & PCRE_CASELESS) != 0)
3735        {        {
3736        c = pcre_lcc[c];        c = md->lcc[c];
3737        for (i = 1; i <= min; i++) if (c != pcre_lcc[*eptr++]) return FALSE;        for (i = 1; i <= min; i++)
3738            if (c != md->lcc[*eptr++]) return FALSE;
3739        if (min == max) continue;        if (min == max) continue;
3740        if (minimize)        if (minimize)
3741          {          {
3742          for (i = min;; i++)          for (i = min;; i++)
3743            {            {
3744            if (match(eptr, ecode, offset_top, md)) return TRUE;            if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb))
3745            if (i >= max || eptr >= md->end_subject || c != pcre_lcc[*eptr++])              return TRUE;
3746              if (i >= max || eptr >= md->end_subject ||
3747                  c != md->lcc[*eptr++])
3748              return FALSE;              return FALSE;
3749            }            }
3750          /* Control never gets here */          /* Control never gets here */
# Line 2989  for (;;) Line 3754  for (;;)
3754          const uschar *pp = eptr;          const uschar *pp = eptr;
3755          for (i = min; i < max; i++)          for (i = min; i < max; i++)
3756            {            {
3757            if (eptr >= md->end_subject || c != pcre_lcc[*eptr]) break;            if (eptr >= md->end_subject || c != md->lcc[*eptr]) break;
3758            eptr++;            eptr++;
3759            }            }
3760          while (eptr >= pp)          while (eptr >= pp)
3761            if (match(eptr--, ecode, offset_top, md)) return TRUE;            if (match(eptr--, ecode, offset_top, md, ims, FALSE, eptrb))
3762                return TRUE;
3763          return FALSE;          return FALSE;
3764          }          }
3765        /* Control never gets here */        /* Control never gets here */
# Line 3009  for (;;) Line 3775  for (;;)
3775          {          {
3776          for (i = min;; i++)          for (i = min;; i++)
3777            {            {
3778            if (match(eptr, ecode, offset_top, md)) return TRUE;            if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb))
3779                return TRUE;
3780            if (i >= max || eptr >= md->end_subject || c != *eptr++) return FALSE;            if (i >= max || eptr >= md->end_subject || c != *eptr++) return FALSE;
3781            }            }
3782          /* Control never gets here */          /* Control never gets here */
# Line 3023  for (;;) Line 3790  for (;;)
3790            eptr++;            eptr++;
3791            }            }
3792          while (eptr >= pp)          while (eptr >= pp)
3793           if (match(eptr--, ecode, offset_top, md)) return TRUE;           if (match(eptr--, ecode, offset_top, md, ims, FALSE, eptrb))
3794               return TRUE;
3795          return FALSE;          return FALSE;
3796          }          }
3797        }        }
# Line 3034  for (;;) Line 3802  for (;;)
3802      case OP_NOT:      case OP_NOT:
3803      if (eptr >= md->end_subject) return FALSE;      if (eptr >= md->end_subject) return FALSE;
3804      ecode++;      ecode++;
3805      if (md->caseless)      if ((ims & PCRE_CASELESS) != 0)
3806        {        {
3807        if (pcre_lcc[*ecode++] == pcre_lcc[*eptr++]) return FALSE;        if (md->lcc[*ecode++] == md->lcc[*eptr++]) return FALSE;
3808        }        }
3809      else      else
3810        {        {
# Line 3094  for (;;) Line 3862  for (;;)
3862      DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", c, min, max,      DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", c, min, max,
3863        max, eptr));        max, eptr));
3864    
3865      if (md->caseless)      if ((ims & PCRE_CASELESS) != 0)
3866        {        {
3867        c = pcre_lcc[c];        c = md->lcc[c];
3868        for (i = 1; i <= min; i++) if (c == pcre_lcc[*eptr++]) return FALSE;        for (i = 1; i <= min; i++)
3869            if (c == md->lcc[*eptr++]) return FALSE;
3870        if (min == max) continue;        if (min == max) continue;
3871        if (minimize)        if (minimize)
3872          {          {
3873          for (i = min;; i++)          for (i = min;; i++)
3874            {            {
3875            if (match(eptr, ecode, offset_top, md)) return TRUE;            if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb))
3876            if (i >= max || eptr >= md->end_subject || c == pcre_lcc[*eptr++])              return TRUE;
3877              if (i >= max || eptr >= md->end_subject ||
3878                  c == md->lcc[*eptr++])
3879              return FALSE;              return FALSE;
3880            }            }
3881          /* Control never gets here */          /* Control never gets here */
# Line 3114  for (;;) Line 3885  for (;;)
3885          const uschar *pp = eptr;          const uschar *pp = eptr;
3886          for (i = min; i < max; i++)          for (i = min; i < max; i++)
3887            {            {
3888            if (eptr >= md->end_subject || c == pcre_lcc[*eptr]) break;            if (eptr >= md->end_subject || c == md->lcc[*eptr]) break;
3889            eptr++;            eptr++;
3890            }            }
3891          while (eptr >= pp)          while (eptr >= pp)
3892            if (match(eptr--, ecode, offset_top, md)) return TRUE;            if (match(eptr--, ecode, offset_top, md, ims, FALSE, eptrb))
3893                return TRUE;
3894          return FALSE;          return FALSE;
3895          }          }
3896        /* Control never gets here */        /* Control never gets here */
# Line 3134  for (;;) Line 3906  for (;;)
3906          {          {
3907          for (i = min;; i++)          for (i = min;; i++)
3908            {            {
3909            if (match(eptr, ecode, offset_top, md)) return TRUE;            if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb))
3910                return TRUE;
3911            if (i >= max || eptr >= md->end_subject || c == *eptr++) return FALSE;            if (i >= max || eptr >= md->end_subject || c == *eptr++) return FALSE;
3912            }            }
3913          /* Control never gets here */          /* Control never gets here */
# Line 3148  for (;;) Line 3921  for (;;)
3921            eptr++;            eptr++;
3922            }            }
3923          while (eptr >= pp)          while (eptr >= pp)
3924           if (match(eptr--, ecode, offset_top, md)) return TRUE;           if (match(eptr--, ecode, offset_top, md, ims, FALSE, eptrb))
3925               return TRUE;
3926          return FALSE;          return FALSE;
3927          }          }
3928        }        }
# Line 3198  for (;;) Line 3972  for (;;)
3972      if (min > 0) switch(ctype)      if (min > 0) switch(ctype)
3973        {        {
3974        case OP_ANY:        case OP_ANY:
3975        if (!md->dotall)        if ((ims & PCRE_DOTALL) == 0)
3976          { for (i = 1; i <= min; i++) if (*eptr++ == '\n') return FALSE; }          { for (i = 1; i <= min; i++) if (*eptr++ == '\n') return FALSE; }
3977        else eptr += min;        else eptr += min;
3978        break;        break;
3979    
3980        case OP_NOT_DIGIT:        case OP_NOT_DIGIT:
3981        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
3982          if ((pcre_ctypes[*eptr++] & ctype_digit) != 0) return FALSE;          if ((md->ctypes[*eptr++] & ctype_digit) != 0) return FALSE;
3983        break;        break;
3984    
3985        case OP_DIGIT:        case OP_DIGIT:
3986        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
3987          if ((pcre_ctypes[*eptr++] & ctype_digit) == 0) return FALSE;          if ((md->ctypes[*eptr++] & ctype_digit) == 0) return FALSE;
3988        break;        break;
3989    
3990        case OP_NOT_WHITESPACE:        case OP_NOT_WHITESPACE:
3991        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
3992          if ((pcre_ctypes[*eptr++] & ctype_space) != 0) return FALSE;          if ((md->ctypes[*eptr++] & ctype_space) != 0) return FALSE;
3993        break;        break;
3994    
3995        case OP_WHITESPACE:        case OP_WHITESPACE:
3996        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
3997          if ((pcre_ctypes[*eptr++] & ctype_space) == 0) return FALSE;          if ((md->ctypes[*eptr++] & ctype_space) == 0) return FALSE;
3998        break;        break;
3999    
4000        case OP_NOT_WORDCHAR:        case OP_NOT_WORDCHAR:
4001        for (i = 1; i <= min; i++) if ((pcre_ctypes[*eptr++] & ctype_word) != 0)        for (i = 1; i <= min; i++)
4002          return FALSE;          if ((md->ctypes[*eptr++] & ctype_word) != 0)
4003              return FALSE;
4004        break;        break;
4005    
4006        case OP_WORDCHAR:        case OP_WORDCHAR:
4007        for (i = 1; i <= min; i++) if ((pcre_ctypes[*eptr++] & ctype_word) == 0)        for (i = 1; i <= min; i++)
4008          return FALSE;          if ((md->ctypes[*eptr++] & ctype_word) == 0)
4009              return FALSE;
4010        break;        break;
4011        }        }
4012    
# Line 3239  for (;;) Line 4015  for (;;)
4015      if (min == max) continue;      if (min == max) continue;
4016    
4017      /* If minimizing, we have to test the rest of the pattern before each      /* If minimizing, we have to test the rest of the pattern before each
4018      subsequent match, so inlining isn't much help; just use the function. */      subsequent match. */
4019    
4020      if (minimize)      if (minimize)
4021        {        {
4022        for (i = min;; i++)        for (i = min;; i++)
4023          {          {
4024          if (match(eptr, ecode, offset_top, md)) return TRUE;          if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb)) return TRUE;
4025          if (i >= max || eptr >= md->end_subject ||          if (i >= max || eptr >= md->end_subject) return FALSE;
4026            !match_type(ctype, *eptr++, md->dotall))  
4027              return FALSE;          c = *eptr++;
4028            switch(ctype)
4029              {
4030              case OP_ANY:
4031              if ((ims & PCRE_DOTALL) == 0 && c == '\n') return FALSE;
4032              break;
4033    
4034              case OP_NOT_DIGIT:
4035              if ((md->ctypes[c] & ctype_digit) != 0) return FALSE;
4036              break;
4037    
4038              case OP_DIGIT:
4039              if ((md->ctypes[c] & ctype_digit) == 0) return FALSE;
4040              break;
4041    
4042              case OP_NOT_WHITESPACE:
4043              if ((md->ctypes[c] & ctype_space) != 0) return FALSE;
4044              break;
4045    
4046              case OP_WHITESPACE:
4047              if  ((md->ctypes[c] & ctype_space) == 0) return FALSE;
4048              break;
4049    
4050              case OP_NOT_WORDCHAR:
4051              if ((md->ctypes[c] & ctype_word) != 0) return FALSE;
4052              break;
4053    
4054              case OP_WORDCHAR:
4055              if ((md->ctypes[c] & ctype_word) == 0) return FALSE;
4056              break;
4057              }
4058          }          }
4059        /* Control never gets here */        /* Control never gets here */
4060        }        }
# Line 3262  for (;;) Line 4068  for (;;)
4068        switch(ctype)        switch(ctype)
4069          {          {
4070          case OP_ANY:          case OP_ANY:
4071          if (!md->dotall)          if ((ims & PCRE_DOTALL) == 0)
4072            {            {
4073            for (i = min; i < max; i++)            for (i = min; i < max; i++)
4074              {              {
# Line 3281  for (;;) Line 4087  for (;;)
4087          case OP_NOT_DIGIT:          case OP_NOT_DIGIT:
4088          for (i = min; i < max; i++)          for (i = min; i < max; i++)
4089            {            {
4090            if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_digit) != 0)            if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) != 0)
4091              break;              break;
4092            eptr++;            eptr++;
4093            }            }
# Line 3290  for (;;) Line 4096  for (;;)
4096          case OP_DIGIT:          case OP_DIGIT:
4097          for (i = min; i < max; i++)          for (i = min; i < max; i++)
4098            {            {
4099            if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_digit) == 0)            if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) == 0)
4100              break;              break;
4101            eptr++;            eptr++;
4102            }            }
# Line 3299  for (;;) Line 4105  for (;;)
4105          case OP_NOT_WHITESPACE:          case OP_NOT_WHITESPACE:
4106          for (i = min; i < max; i++)          for (i = min; i < max; i++)
4107            {            {
4108            if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_space) != 0)            if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) != 0)
4109              break;              break;
4110            eptr++;            eptr++;
4111            }            }
# Line 3308  for (;;) Line 4114  for (;;)
4114          case OP_WHITESPACE:          case OP_WHITESPACE:
4115          for (i = min; i < max; i++)          for (i = min; i < max; i++)
4116            {            {
4117            if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_space) == 0)            if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) == 0)
4118              break;              break;
4119            eptr++;            eptr++;
4120            }            }
# Line 3317  for (;;) Line 4123  for (;;)
4123          case OP_NOT_WORDCHAR:          case OP_NOT_WORDCHAR:
4124          for (i = min; i < max; i++)          for (i = min; i < max; i++)
4125            {            {
4126            if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_word) != 0)            if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) != 0)
4127              break;              break;
4128            eptr++;            eptr++;
4129            }            }
# Line 3326  for (;;) Line 4132  for (;;)
4132          case OP_WORDCHAR:          case OP_WORDCHAR:
4133          for (i = min; i < max; i++)          for (i = min; i < max; i++)
4134            {            {
4135            if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_word) == 0)            if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) == 0)
4136              break;              break;
4137            eptr++;            eptr++;
4138            }            }
# Line 3334  for (;;) Line 4140  for (;;)
4140          }          }
4141    
4142        while (eptr >= pp)        while (eptr >= pp)
4143          if (match(eptr--, ecode, offset_top, md)) return TRUE;          if (match(eptr--, ecode, offset_top, md, ims, FALSE, eptrb))
4144              return TRUE;
4145        return FALSE;        return FALSE;
4146        }        }
4147      /* Control never gets here */      /* Control never gets here */
# Line 3357  for (;;) Line 4164  for (;;)
4164    
4165    
4166    
 /*************************************************  
 *         Segregate setjmp()                     *  
 *************************************************/  
   
 /* The -Wall option of gcc gives warnings for all local variables when setjmp()  
 is used, even if the coding conforms to the rules of ANSI C. To avoid this, we  
 hide it in a separate function. This is called only when PCRE_EXTRA is set,  
 since it's needed only for the extension \X option, and with any luck, a good  
 compiler will spot the tail recursion and compile it efficiently.  
   
 Arguments:  
    eptr        pointer in subject  
    ecode       position in code  
    offset_top  current top pointer  
    md          pointer to "static" info for the match  
   
 Returns:       TRUE if matched  
 */  
   
 static BOOL  
 match_with_setjmp(const uschar *eptr, const uschar *ecode, int offset_top,  
   match_data *match_block)  
 {  
 return setjmp(match_block->fail_env) == 0 &&  
       match(eptr, ecode, offset_top, match_block);  
 }  
   
   
4167    
4168  /*************************************************  /*************************************************
4169  *         Execute a Regular Expression           *  *         Execute a Regular Expression           *
# Line 3399  Arguments: Line 4178  Arguments:
4178    external_extra  points to "hints" from pcre_study() or is NULL    external_extra  points to "hints" from pcre_study() or is NULL
4179    subject         points to the subject string    subject         points to the subject string
4180    length          length of subject string (may contain binary zeros)    length          length of subject string (may contain binary zeros)
4181      start_offset    where to start in the subject string
4182    options         option bits    options         option bits
4183    offsets         points to a vector of ints to be filled in with offsets    offsets         points to a vector of ints to be filled in with offsets
4184    offsetcount     the number of elements in the vector    offsetcount     the number of elements in the vector
# Line 3411  Returns:          > 0 => success; value Line 4191  Returns:          > 0 => success; value
4191    
4192  int  int
4193  pcre_exec(const pcre *external_re, const pcre_extra *external_extra,  pcre_exec(const pcre *external_re, const pcre_extra *external_extra,
4194    const char *subject, int length, int options, int *offsets, int offsetcount)    const char *subject, int length, int start_offset, int options, int *offsets,
4195      int offsetcount)
4196  {  {
4197  int resetcount, ocount;  int resetcount, ocount;
4198  int first_char = -1;  int first_char = -1;
4199    int req_char = -1;
4200    int req_char2 = -1;
4201    unsigned long int ims = 0;
4202  match_data match_block;  match_data match_block;
4203  const uschar *start_bits = NULL;  const uschar *start_bits = NULL;
4204  const uschar *start_match = (const uschar *)subject;  const uschar *start_match = (const uschar *)subject + start_offset;
4205  const uschar *end_subject;  const uschar *end_subject;
4206    const uschar *req_char_ptr = start_match - 1;
4207  const real_pcre *re = (const real_pcre *)external_re;  const real_pcre *re = (const real_pcre *)external_re;
4208  const real_pcre_extra *extra = (const real_pcre_extra *)external_extra;  const real_pcre_extra *extra = (const real_pcre_extra *)external_extra;
4209  BOOL using_temporary_offsets = FALSE;  BOOL using_temporary_offsets = FALSE;
# Line 3435  match_block.start_subject = (const uscha Line 4220  match_block.start_subject = (const uscha
4220  match_block.end_subject = match_block.start_subject + length;  match_block.end_subject = match_block.start_subject + length;
4221  end_subject = match_block.end_subject;  end_subject = match_block.end_subject;
4222    
4223  match_block.caseless  = ((re->options | options) & PCRE_CASELESS) != 0;  match_block.endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
 match_block.runtime_caseless = match_block.caseless &&  
   (re->options & PCRE_CASELESS) == 0;  
   
 match_block.multiline = ((re->options | options) & PCRE_MULTILINE) != 0;  
 match_block.dotall    = ((re->options | options) & PCRE_DOTALL) != 0;  
 match_block.endonly   = ((re->options | options) & PCRE_DOLLAR_ENDONLY) != 0;  
4224    
4225  match_block.notbol = (options & PCRE_NOTBOL) != 0;  match_block.notbol = (options & PCRE_NOTBOL) != 0;
4226  match_block.noteol = (options & PCRE_NOTEOL) != 0;  match_block.noteol = (options & PCRE_NOTEOL) != 0;
4227    match_block.notempty = (options & PCRE_NOTEMPTY) != 0;
4228    
4229  match_block.errorcode = PCRE_ERROR_NOMATCH;     /* Default error */  match_block.errorcode = PCRE_ERROR_NOMATCH;     /* Default error */
4230    
4231    match_block.lcc = re->tables + lcc_offset;
4232    match_block.ctypes = re->tables + ctypes_offset;