/[pcre]/code/trunk/pcre.c
ViewVC logotype

Diff of /code/trunk/pcre.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 11 by nigel, Sat Feb 24 21:38:17 2007 UTC revision 33 by nigel, Sat Feb 24 21:39:01 2007 UTC
# Line 9  the file Tech.Notes for some information Line 9  the file Tech.Notes for some information
9    
10  Written by: Philip Hazel <ph10@cam.ac.uk>  Written by: Philip Hazel <ph10@cam.ac.uk>
11    
12             Copyright (c) 1997 University of Cambridge             Copyright (c) 1997-1999 University of Cambridge
13    
14  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
15  Permission is granted to anyone to use this software for any purpose on any  Permission is granted to anyone to use this software for any purpose on any
# Line 25  restrictions: Line 25  restrictions:
25    
26  3. Altered versions must be plainly marked as such, and must not be  3. Altered versions must be plainly marked as such, and must not be
27     misrepresented as being the original software.     misrepresented as being the original software.
28    
29    4. If PCRE is embedded in any software that is released under the GNU
30       General Purpose Licence (GPL), then the terms of that licence shall
31       supersede any condition above with which it is incompatible.
32  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
33  */  */
34    
# Line 33  restrictions: Line 37  restrictions:
37    
38  /* #define DEBUG */  /* #define DEBUG */
39    
40  /* Use a macro for debugging printing, 'cause that eliminates the the use  /* Use a macro for debugging printing, 'cause that eliminates the use of #ifdef
41  of #ifdef inline, and there are *still* stupid compilers about that don't like  inline, and there are *still* stupid compilers about that don't like indented
42  indented pre-processor statements. I suppose it's only been 10 years... */  pre-processor statements. I suppose it's only been 10 years... */
43    
44  #ifdef DEBUG  #ifdef DEBUG
45  #define DPRINTF(p) printf p  #define DPRINTF(p) printf p
# Line 49  the external pcre header. */ Line 53  the external pcre header. */
53  #include "internal.h"  #include "internal.h"
54    
55    
56    /* Allow compilation as C++ source code, should anybody want to do that. */
57    
58    #ifdef __cplusplus
59    #define class pcre_class
60    #endif
61    
62    
63    /* Number of items on the nested bracket stacks at compile time. This should
64    not be set greater than 200. */
65    
66    #define BRASTACK_SIZE 200
67    
68    
69  /* Min and max values for the common repeats; for the maxima, 0 => infinity */  /* Min and max values for the common repeats; for the maxima, 0 => infinity */
70    
71  static char rep_min[] = { 0, 0, 1, 1, 0, 0 };  static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
72  static char rep_max[] = { 0, 0, 0, 0, 1, 1 };  static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
73    
74  /* Text forms of OP_ values and things, for debugging */  /* Text forms of OP_ values and things, for debugging (not all used) */
75    
76  #ifdef DEBUG  #ifdef DEBUG
77  static const char *OP_names[] = {  static const char *OP_names[] = {
78    "End", "\\A", "\\B", "\\b", "\\D", "\\d",    "End", "\\A", "\\B", "\\b", "\\D", "\\d",
79    "\\S", "\\s", "\\W", "\\w", "Cut", "\\Z", "^", "$", "Any", "chars",    "\\S", "\\s", "\\W", "\\w", "\\Z", "\\z",
80    "not",    "Opt", "^", "$", "Any", "chars", "not",
81    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
82    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
83    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
84    "*", "*?", "+", "+?", "?", "??", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{",
85    "class", "Ref",    "class", "Ref",
86    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", "Once",    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",
87      "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",
88    "Brazero", "Braminzero", "Bra"    "Brazero", "Braminzero", "Bra"
89  };  };
90  #endif  #endif
# Line 76  are simple data values; negative values Line 94  are simple data values; negative values
94  on. Zero means further processing is needed (for things like \x), or the escape  on. Zero means further processing is needed (for things like \x), or the escape
95  is invalid. */  is invalid. */
96    
97  static short int escapes[] = {  static const short int escapes[] = {
98      0,      0,      0,      0,      0,      0,      0,      0,   /* 0 - 7 */      0,      0,      0,      0,      0,      0,      0,      0,   /* 0 - 7 */
99      0,      0,    ':',    ';',    '<',    '=',    '>',    '?',   /* 8 - ? */      0,      0,    ':',    ';',    '<',    '=',    '>',    '?',   /* 8 - ? */
100    '@', -ESC_A, -ESC_B,      0, -ESC_D,      0,      0,      0,   /* @ - G */    '@', -ESC_A, -ESC_B,      0, -ESC_D,      0,      0,      0,   /* @ - G */
# Line 86  static short int escapes[] = { Line 104  static short int escapes[] = {
104    '`',      7, -ESC_b,      0, -ESC_d,     27,   '\f',      0,   /* ` - g */    '`',      7, -ESC_b,      0, -ESC_d,     27,   '\f',      0,   /* ` - g */
105      0,      0,      0,      0,      0,      0,   '\n',      0,   /* h - o */      0,      0,      0,      0,      0,      0,   '\n',      0,   /* h - o */
106      0,      0,   '\r', -ESC_s,   '\t',      0,      0, -ESC_w,   /* p - w */      0,      0,   '\r', -ESC_s,   '\t',      0,      0, -ESC_w,   /* p - w */
107      0,      0,      0                                            /* x - z */      0,      0, -ESC_z                                            /* x - z */
108  };  };
109    
110  /* Definition to allow mutual recursion */  /* Definition to allow mutual recursion */
111    
112  static BOOL compile_regex(int, int *, uschar **, const uschar **, const char **);  static BOOL
113      compile_regex(int, int, int *, uschar **, const uschar **, const char **,
114  /* Structure for passing "static" information around between the functions      BOOL, int, compile_data *);
 doing the matching, so that they are thread-safe. */  
   
 typedef struct match_data {  
   int    errorcode;             /* As it says */  
   int   *offset_vector;         /* Offset vector */  
   int    offset_end;            /* One past the end */  
   BOOL   offset_overflow;       /* Set if too many extractions */  
   BOOL   caseless;              /* Case-independent flag */  
   BOOL   runtime_caseless;      /* Caseless forced at run time */  
   BOOL   multiline;             /* Multiline flag */  
   BOOL   notbol;                /* NOTBOL flag */  
   BOOL   noteol;                /* NOTEOL flag */  
   BOOL   dotall;                /* Dot matches any char */  
   BOOL   endonly;               /* Dollar not before final \n */  
   const uschar *start_subject;  /* Start of the subject string */  
   const uschar *end_subject;    /* End of the subject string */  
   jmp_buf fail_env;             /* Environment for longjump() break out */  
   const uschar *end_match_ptr;  /* Subject position at end match */  
   int     end_offset_top;       /* Highwater mark at end of match */  
 } match_data;  
115    
116    
117    
# Line 133  void  (*pcre_free)(void *) = free; Line 131  void  (*pcre_free)(void *) = free;
131    
132    
133  /*************************************************  /*************************************************
134    *             Default character tables           *
135    *************************************************/
136    
137    /* A default set of character tables is included in the PCRE binary. Its source
138    is built by the maketables auxiliary program, which uses the default C ctypes
139    functions, and put in the file chartables.c. These tables are used by PCRE
140    whenever the caller of pcre_compile() does not provide an alternate set of
141    tables. */
142    
143    #include "chartables.c"
144    
145    
146    
147    /*************************************************
148  *          Return version string                 *  *          Return version string                 *
149  *************************************************/  *************************************************/
150    
# Line 210  while (length-- > 0) Line 222  while (length-- > 0)
222    
223    
224  /*************************************************  /*************************************************
 *         Check subpattern for empty operand     *  
 *************************************************/  
   
 /* This function checks a bracketed subpattern to see if any of the paths  
 through it could match an empty string. This is used to diagnose an error if  
 such a subpattern is followed by a quantifier with an unlimited upper bound.  
   
 Argument:  
   code      points to the opening bracket  
   
 Returns:    TRUE or FALSE  
 */  
   
 static BOOL  
 could_be_empty(uschar *code)  
 {  
 do {  
   uschar *cc = code + 3;  
   
   /* Scan along the opcodes for this branch; as soon as we find something  
   that matches a non-empty string, break out and advance to test the next  
   branch. If we get to the end of the branch, return TRUE for the whole  
   sub-expression. */  
   
   for (;;)  
     {  
     /* Test an embedded subpattern; if it could not be empty, break the  
     loop. Otherwise carry on in the branch. */  
   
     if ((int)(*cc) >= OP_BRA || (int)(*cc) == OP_ONCE)  
       {  
       if (!could_be_empty(cc)) break;  
       do cc += (cc[1] << 8) + cc[2]; while (*cc == OP_ALT);  
       cc += 3;  
       }  
   
     else switch (*cc)  
       {  
       /* Reached end of a branch: the subpattern may match the empty string */  
   
       case OP_ALT:  
       case OP_KET:  
       case OP_KETRMAX:  
       case OP_KETRMIN:  
       return TRUE;  
   
       /* Skip over assertive subpatterns */  
   
       case OP_ASSERT:  
       case OP_ASSERT_NOT:  
       do cc += (cc[1] << 8) + cc[2]; while (*cc == OP_ALT);  
       cc += 3;  
       break;  
   
       /* Skip over things that don't match chars */  
   
       case OP_SOD:  
       case OP_EOD:  
       case OP_CIRC:  
       case OP_DOLL:  
       case OP_BRAZERO:  
       case OP_BRAMINZERO:  
       case OP_NOT_WORD_BOUNDARY:  
       case OP_WORD_BOUNDARY:  
       cc++;  
       break;  
   
       /* Skip over simple repeats with zero lower bound */  
   
       case OP_STAR:  
       case OP_MINSTAR:  
       case OP_QUERY:  
       case OP_MINQUERY:  
       case OP_NOTSTAR:  
       case OP_NOTMINSTAR:  
       case OP_NOTQUERY:  
       case OP_NOTMINQUERY:  
       case OP_TYPESTAR:  
       case OP_TYPEMINSTAR:  
       case OP_TYPEQUERY:  
       case OP_TYPEMINQUERY:  
       cc += 2;  
       break;  
   
       /* Skip over UPTOs (lower bound is zero) */  
   
       case OP_UPTO:  
       case OP_MINUPTO:  
       case OP_TYPEUPTO:  
       case OP_TYPEMINUPTO:  
       cc += 4;  
       break;  
   
       /* Check a class or a back reference for a zero minimum */  
   
       case OP_CLASS:  
       case OP_REF:  
       cc += (*cc == OP_REF)? 2 : 33;  
   
       switch (*cc)  
         {  
         case OP_CRSTAR:  
         case OP_CRMINSTAR:  
         case OP_CRQUERY:  
         case OP_CRMINQUERY:  
         cc++;  
         break;  
   
         case OP_CRRANGE:  
         case OP_CRMINRANGE:  
         if ((cc[1] << 8) + cc[2] != 0) goto NEXT_BRANCH;  
         cc += 3;  
         break;  
   
         default:  
         goto NEXT_BRANCH;  
         }  
       break;  
   
       /* Anything else matches at least one character */  
   
       default:  
       goto NEXT_BRANCH;  
       }  
     }  
   
   NEXT_BRANCH:  
   code += (code[1] << 8) + code[2];  
   }  
 while (*code == OP_ALT);  
   
 /* No branches match the empty string */  
   
 return FALSE;  
 }  
   
   
   
 /*************************************************  
225  *            Handle escapes                      *  *            Handle escapes                      *
226  *************************************************/  *************************************************/
227    
# Line 364  Arguments: Line 237  Arguments:
237    bracount   number of previous extracting brackets    bracount   number of previous extracting brackets
238    options    the options bits    options    the options bits
239    isclass    TRUE if inside a character class    isclass    TRUE if inside a character class
240      cd         pointer to char tables block
241    
242  Returns:     zero or positive => a data character  Returns:     zero or positive => a data character
243               negative => a special escape sequence               negative => a special escape sequence
# Line 372  Returns:     zero or positive => a data Line 246  Returns:     zero or positive => a data
246    
247  static int  static int
248  check_escape(const uschar **ptrptr, const char **errorptr, int bracount,  check_escape(const uschar **ptrptr, const char **errorptr, int bracount,
249    int options, BOOL isclass)    int options, BOOL isclass, compile_data *cd)
250  {  {
251  const uschar *ptr = *ptrptr;  const uschar *ptr = *ptrptr;
252  int c = *(++ptr) & 255;   /* Ensure > 0 on signed-char systems */  int c = *(++ptr) & 255;   /* Ensure > 0 on signed-char systems */
# Line 415  else Line 289  else
289        {        {
290        oldptr = ptr;        oldptr = ptr;
291        c -= '0';        c -= '0';
292        while ((pcre_ctypes[ptr[1]] & ctype_digit) != 0)        while ((cd->ctypes[ptr[1]] & ctype_digit) != 0)
293          c = c * 10 + *(++ptr) - '0';          c = c * 10 + *(++ptr) - '0';
294        if (c < 10 || c <= bracount)        if (c < 10 || c <= bracount)
295          {          {
# Line 441  else Line 315  else
315    
316      case '0':      case '0':
317      c -= '0';      c -= '0';
318      while(i++ < 2 && (pcre_ctypes[ptr[1]] & ctype_digit) != 0 &&      while(i++ < 2 && (cd->ctypes[ptr[1]] & ctype_digit) != 0 &&
319        ptr[1] != '8' && ptr[1] != '9')        ptr[1] != '8' && ptr[1] != '9')
320          c = c * 8 + *(++ptr) - '0';          c = c * 8 + *(++ptr) - '0';
321      break;      break;
# Line 450  else Line 324  else
324    
325      case 'x':      case 'x':
326      c = 0;      c = 0;
327      while (i++ < 2 && (pcre_ctypes[ptr[1]] & ctype_xdigit) != 0)      while (i++ < 2 && (cd->ctypes[ptr[1]] & ctype_xdigit) != 0)
328        {        {
329        ptr++;        ptr++;
330        c = c * 16 + pcre_lcc[*ptr] -        c = c * 16 + cd->lcc[*ptr] -
331          (((pcre_ctypes[*ptr] & ctype_digit) != 0)? '0' : 'W');          (((cd->ctypes[*ptr] & ctype_digit) != 0)? '0' : 'W');
332        }        }
333      break;      break;
334    
# Line 468  else Line 342  else
342    
343      /* A letter is upper-cased; then the 0x40 bit is flipped */      /* A letter is upper-cased; then the 0x40 bit is flipped */
344    
345      if (c >= 'a' && c <= 'z') c = pcre_fcc[c];      if (c >= 'a' && c <= 'z') c = cd->fcc[c];
346      c ^= 0x40;      c ^= 0x40;
347      break;      break;
348    
349      /* PCRE_EXTRA enables extensions to Perl in the matter of escapes. Any      /* PCRE_EXTRA enables extensions to Perl in the matter of escapes. Any
350      other alphameric following \ is an error if PCRE_EXTRA was set; otherwise,      other alphameric following \ is an error if PCRE_EXTRA was set; otherwise,
351      for Perl compatibility, it is a literal. */      for Perl compatibility, it is a literal. This code looks a bit odd, but
352        there used to be some cases other than the default, and there may be again
353        in future, so I haven't "optimized" it. */
354    
355      default:      default:
356      if ((options & PCRE_EXTRA) != 0) switch(c)      if ((options & PCRE_EXTRA) != 0) switch(c)
357        {        {
       case 'X':  
       c = -ESC_X;      /* This could be a lookup if it ever got into Perl */  
       break;  
   
358        default:        default:
359        *errorptr = ERR3;        *errorptr = ERR3;
360        break;        break;
# Line 508  where the ddds are digits. Line 380  where the ddds are digits.
380    
381  Arguments:  Arguments:
382    p         pointer to the first char after '{'    p         pointer to the first char after '{'
383      cd        pointer to char tables block
384    
385  Returns:    TRUE or FALSE  Returns:    TRUE or FALSE
386  */  */
387    
388  static BOOL  static BOOL
389  is_counted_repeat(const uschar *p)  is_counted_repeat(const uschar *p, compile_data *cd)
390  {  {
391  if ((pcre_ctypes[*p++] & ctype_digit) == 0) return FALSE;  if ((cd->ctypes[*p++] & ctype_digit) == 0) return FALSE;
392  while ((pcre_ctypes[*p] & ctype_digit) != 0) p++;  while ((cd->ctypes[*p] & ctype_digit) != 0) p++;
393  if (*p == '}') return TRUE;  if (*p == '}') return TRUE;
394    
395  if (*p++ != ',') return FALSE;  if (*p++ != ',') return FALSE;
396  if (*p == '}') return TRUE;  if (*p == '}') return TRUE;
397    
398  if ((pcre_ctypes[*p++] & ctype_digit) == 0) return FALSE;  if ((cd->ctypes[*p++] & ctype_digit) == 0) return FALSE;
399  while ((pcre_ctypes[*p] & ctype_digit) != 0) p++;  while ((cd->ctypes[*p] & ctype_digit) != 0) p++;
400  return (*p == '}');  return (*p == '}');
401  }  }
402    
# Line 543  Arguments: Line 416  Arguments:
416    maxp       pointer to int for max    maxp       pointer to int for max
417               returned as -1 if no max               returned as -1 if no max
418    errorptr   points to pointer to error message    errorptr   points to pointer to error message
419      cd         pointer to character tables clock
420    
421  Returns:     pointer to '}' on success;  Returns:     pointer to '}' on success;
422               current ptr on error, with errorptr set               current ptr on error, with errorptr set
423  */  */
424    
425  static const uschar *  static const uschar *
426  read_repeat_counts(const uschar *p, int *minp, int *maxp, const char **errorptr)  read_repeat_counts(const uschar *p, int *minp, int *maxp,
427      const char **errorptr, compile_data *cd)
428  {  {
429  int min = 0;  int min = 0;
430  int max = -1;  int max = -1;
431    
432  while ((pcre_ctypes[*p] & ctype_digit) != 0) min = min * 10 + *p++ - '0';  while ((cd->ctypes[*p] & ctype_digit) != 0) min = min * 10 + *p++ - '0';
433    
434  if (*p == '}') max = min; else  if (*p == '}') max = min; else
435    {    {
436    if (*(++p) != '}')    if (*(++p) != '}')
437      {      {
438      max = 0;      max = 0;
439      while((pcre_ctypes[*p] & ctype_digit) != 0) max = max * 10 + *p++ - '0';      while((cd->ctypes[*p] & ctype_digit) != 0) max = max * 10 + *p++ - '0';
440      if (max < min)      if (max < min)
441        {        {
442        *errorptr = ERR4;        *errorptr = ERR4;
# Line 586  return p; Line 461  return p;
461    
462    
463  /*************************************************  /*************************************************
464    *        Find the fixed length of a pattern      *
465    *************************************************/
466    
467    /* Scan a pattern and compute the fixed length of subject that will match it,
468    if the length is fixed. This is needed for dealing with backward assertions.
469    
470    Arguments:
471      code     points to the start of the pattern (the bracket)
472    
473    Returns:   the fixed length, or -1 if there is no fixed length
474    */
475    
476    static int
477    find_fixedlength(uschar *code)
478    {
479    int length = -1;
480    
481    register int branchlength = 0;
482    register uschar *cc = code + 3;
483    
484    /* Scan along the opcodes for this branch. If we get to the end of the
485    branch, check the length against that of the other branches. */
486    
487    for (;;)
488      {
489      int d;
490      register int op = *cc;
491      if (op >= OP_BRA) op = OP_BRA;
492    
493      switch (op)
494        {
495        case OP_BRA:
496        case OP_ONCE:
497        case OP_COND:
498        d = find_fixedlength(cc);
499        if (d < 0) return -1;
500        branchlength += d;
501        do cc += (cc[1] << 8) + cc[2]; while (*cc == OP_ALT);
502        cc += 3;
503        break;
504    
505        /* Reached end of a branch; if it's a ket it is the end of a nested
506        call. If it's ALT it is an alternation in a nested call. If it is
507        END it's the end of the outer call. All can be handled by the same code. */
508    
509        case OP_ALT:
510        case OP_KET:
511        case OP_KETRMAX:
512        case OP_KETRMIN:
513        case OP_END:
514        if (length < 0) length = branchlength;
515          else if (length != branchlength) return -1;
516        if (*cc != OP_ALT) return length;
517        cc += 3;
518        branchlength = 0;
519        break;
520    
521        /* Skip over assertive subpatterns */
522    
523        case OP_ASSERT:
524        case OP_ASSERT_NOT:
525        case OP_ASSERTBACK:
526        case OP_ASSERTBACK_NOT:
527        do cc += (cc[1] << 8) + cc[2]; while (*cc == OP_ALT);
528        cc += 3;
529        break;
530    
531        /* Skip over things that don't match chars */
532    
533        case OP_REVERSE:
534        cc++;
535    
536        case OP_CREF:
537        case OP_OPT:
538        cc++;
539        /* Fall through */
540    
541        case OP_SOD:
542        case OP_EOD:
543        case OP_EODN:
544        case OP_CIRC:
545        case OP_DOLL:
546        case OP_NOT_WORD_BOUNDARY:
547        case OP_WORD_BOUNDARY:
548        cc++;
549        break;
550    
551        /* Handle char strings */
552    
553        case OP_CHARS:
554        branchlength += *(++cc);
555        cc += *cc + 1;
556        break;
557    
558        /* Handle exact repetitions */
559    
560        case OP_EXACT:
561        case OP_TYPEEXACT:
562        branchlength += (cc[1] << 8) + cc[2];
563        cc += 4;
564        break;
565    
566        /* Handle single-char matchers */
567    
568        case OP_NOT_DIGIT:
569        case OP_DIGIT:
570        case OP_NOT_WHITESPACE:
571        case OP_WHITESPACE:
572        case OP_NOT_WORDCHAR:
573        case OP_WORDCHAR:
574        case OP_ANY:
575        branchlength++;
576        cc++;
577        break;
578    
579    
580        /* Check a class for variable quantification */
581    
582        case OP_CLASS:
583        cc += (*cc == OP_REF)? 2 : 33;
584    
585        switch (*cc)
586          {
587          case OP_CRSTAR:
588          case OP_CRMINSTAR:
589          case OP_CRQUERY:
590          case OP_CRMINQUERY:
591          return -1;
592    
593          case OP_CRRANGE:
594          case OP_CRMINRANGE:
595          if ((cc[1] << 8) + cc[2] != (cc[3] << 8) + cc[4]) return -1;
596          branchlength += (cc[1] << 8) + cc[2];
597          cc += 5;
598          break;
599    
600          default:
601          branchlength++;
602          }
603        break;
604    
605        /* Anything else is variable length */
606    
607        default:
608        return -1;
609        }
610      }
611    /* Control never gets here */
612    }
613    
614    
615    
616    
617    /*************************************************
618  *           Compile one branch                   *  *           Compile one branch                   *
619  *************************************************/  *************************************************/
620    
621  /* Scan the pattern, compiling it into the code vector.  /* Scan the pattern, compiling it into the code vector.
622    
623  Arguments:  Arguments:
624    options    the option bits    options      the option bits
625    bracket    points to number of brackets used    brackets     points to number of brackets used
626    code       points to the pointer to the current code point    code         points to the pointer to the current code point
627    ptrptr     points to the current pattern pointer    ptrptr       points to the current pattern pointer
628    errorptr   points to pointer to error message    errorptr     points to pointer to error message
629      optchanged   set to the value of the last OP_OPT item compiled
630      cd           contains pointers to tables
631    
632  Returns:     TRUE on success  Returns:       TRUE on success
633               FALSE, with *errorptr set on error                 FALSE, with *errorptr set on error
634  */  */
635    
636  static BOOL  static BOOL
637  compile_branch(int options, int *brackets, uschar **codeptr,  compile_branch(int options, int *brackets, uschar **codeptr,
638    const uschar **ptrptr, const char **errorptr)    const uschar **ptrptr, const char **errorptr, int *optchanged,
639      compile_data *cd)
640  {  {
641  int repeat_type, op_type;  int repeat_type, op_type;
642  int repeat_min, repeat_max;  int repeat_min, repeat_max;
643  int bravalue, length;  int bravalue, length;
644    int greedy_default, greedy_non_default;
645  register int c;  register int c;
646  register uschar *code = *codeptr;  register uschar *code = *codeptr;
647    uschar *tempcode;
648  const uschar *ptr = *ptrptr;  const uschar *ptr = *ptrptr;
649  const uschar *oldptr;  const uschar *tempptr;
650  uschar *previous = NULL;  uschar *previous = NULL;
651  uschar class[32];  uschar class[32];
652    
653    /* Set up the default and non-default settings for greediness */
654    
655    greedy_default = ((options & PCRE_UNGREEDY) != 0);
656    greedy_non_default = greedy_default ^ 1;
657    
658  /* Switch on next character until the end of the branch */  /* Switch on next character until the end of the branch */
659    
660  for (;; ptr++)  for (;; ptr++)
661    {    {
662    BOOL negate_class;    BOOL negate_class;
663    int  class_charcount;    int class_charcount;
664    int  class_lastchar;    int class_lastchar;
665      int newoptions;
666      int condref;
667    
668    c = *ptr;    c = *ptr;
669    if ((options & PCRE_EXTENDED) != 0)    if ((options & PCRE_EXTENDED) != 0)
670      {      {
671      if ((pcre_ctypes[c] & ctype_space) != 0) continue;      if ((cd->ctypes[c] & ctype_space) != 0) continue;
672      if (c == '#')      if (c == '#')
673        {        {
674        while ((c = *(++ptr)) != 0 && c != '\n');        while ((c = *(++ptr)) != 0 && c != '\n');
# Line 672  for (;; ptr++) Line 713  for (;; ptr++)
713      previous = code;      previous = code;
714      *code++ = OP_CLASS;      *code++ = OP_CLASS;
715    
716      /* If the first character is '^', set the negation flag */      /* If the first character is '^', set the negation flag and skip it. */
717    
718      if ((c = *(++ptr)) == '^')      if ((c = *(++ptr)) == '^')
719        {        {
# Line 715  for (;; ptr++) Line 756  for (;; ptr++)
756    
757        if (c == '\\')        if (c == '\\')
758          {          {
759          c = check_escape(&ptr, errorptr, *brackets, options, TRUE);          c = check_escape(&ptr, errorptr, *brackets, options, TRUE, cd);
760          if (-c == ESC_b) c = '\b';          if (-c == ESC_b) c = '\b';
761          else if (c < 0)          else if (c < 0)
762            {            {
763              register const uschar *cbits = cd->cbits;
764            class_charcount = 10;            class_charcount = 10;
765            switch (-c)            switch (-c)
766              {              {
767              case ESC_d:              case ESC_d:
768              for (c = 0; c < 32; c++) class[c] |= pcre_cbits[c+cbit_digit];              for (c = 0; c < 32; c++) class[c] |= cbits[c+cbit_digit];
769              continue;              continue;
770    
771              case ESC_D:              case ESC_D:
772              for (c = 0; c < 32; c++) class[c] |= ~pcre_cbits[c+cbit_digit];              for (c = 0; c < 32; c++) class[c] |= ~cbits[c+cbit_digit];
773              continue;              continue;
774    
775              case ESC_w:              case ESC_w:
776              for (c = 0; c < 32; c++)              for (c = 0; c < 32; c++)
777                class[c] |= (pcre_cbits[c] | pcre_cbits[c+cbit_word]);                class[c] |= (cbits[c+cbit_digit] | cbits[c+cbit_word]);
778              continue;              continue;
779    
780              case ESC_W:              case ESC_W:
781              for (c = 0; c < 32; c++)              for (c = 0; c < 32; c++)
782                class[c] |= ~(pcre_cbits[c] | pcre_cbits[c+cbit_word]);                class[c] |= ~(cbits[c+cbit_digit] | cbits[c+cbit_word]);
783              continue;              continue;
784    
785              case ESC_s:              case ESC_s:
786              for (c = 0; c < 32; c++) class[c] |= pcre_cbits[c+cbit_space];              for (c = 0; c < 32; c++) class[c] |= cbits[c+cbit_space];
787              continue;              continue;
788    
789              case ESC_S:              case ESC_S:
790              for (c = 0; c < 32; c++) class[c] |= ~pcre_cbits[c+cbit_space];              for (c = 0; c < 32; c++) class[c] |= ~cbits[c+cbit_space];
791              continue;              continue;
792    
793              default:              default:
# Line 777  for (;; ptr++) Line 819  for (;; ptr++)
819    
820          if (d == '\\')          if (d == '\\')
821            {            {
822            d = check_escape(&ptr, errorptr, *brackets, options, TRUE);            d = check_escape(&ptr, errorptr, *brackets, options, TRUE, cd);
823            if (d < 0)            if (d < 0)
824              {              {
825              if (d == -ESC_b) d = '\b'; else              if (d == -ESC_b) d = '\b'; else
# Line 799  for (;; ptr++) Line 841  for (;; ptr++)
841            class[c/8] |= (1 << (c&7));            class[c/8] |= (1 << (c&7));
842            if ((options & PCRE_CASELESS) != 0)            if ((options & PCRE_CASELESS) != 0)
843              {              {
844              int uc = pcre_fcc[c];           /* flip case */              int uc = cd->fcc[c];           /* flip case */
845              class[uc/8] |= (1 << (uc&7));              class[uc/8] |= (1 << (uc&7));
846              }              }
847            class_charcount++;                /* in case a one-char range */            class_charcount++;                /* in case a one-char range */
# Line 814  for (;; ptr++) Line 856  for (;; ptr++)
856        class [c/8] |= (1 << (c&7));        class [c/8] |= (1 << (c&7));
857        if ((options & PCRE_CASELESS) != 0)        if ((options & PCRE_CASELESS) != 0)
858          {          {
859          c = pcre_fcc[c];   /* flip case */          c = cd->fcc[c];   /* flip case */
860          class[c/8] |= (1 << (c&7));          class[c/8] |= (1 << (c&7));
861          }          }
862        class_charcount++;        class_charcount++;
# Line 861  for (;; ptr++) Line 903  for (;; ptr++)
903      /* Various kinds of repeat */      /* Various kinds of repeat */
904    
905      case '{':      case '{':
906      if (!is_counted_repeat(ptr+1)) goto NORMAL_CHAR;      if (!is_counted_repeat(ptr+1, cd)) goto NORMAL_CHAR;
907      ptr = read_repeat_counts(ptr+1, &repeat_min, &repeat_max, errorptr);      ptr = read_repeat_counts(ptr+1, &repeat_min, &repeat_max, errorptr, cd);
908      if (*errorptr != NULL) goto FAILED;      if (*errorptr != NULL) goto FAILED;
909      goto REPEAT;      goto REPEAT;
910    
# Line 887  for (;; ptr++) Line 929  for (;; ptr++)
929        goto FAILED;        goto FAILED;
930        }        }
931    
932      /* If the next character is '?' this is a minimizing repeat. Advance to the      /* If the next character is '?' this is a minimizing repeat, by default,
933        but if PCRE_UNGREEDY is set, it works the other way round. Advance to the
934      next character. */      next character. */
935    
936      if (ptr[1] == '?') { repeat_type = 1; ptr++; } else repeat_type = 0;      if (ptr[1] == '?')
937          { repeat_type = greedy_non_default; ptr++; }
938        else repeat_type = greedy_default;
939    
940      /* If the maximum is zero then the minimum must also be zero; Perl allows      /* If the maximum is zero then the minimum must also be zero; Perl allows
941      this case, so we do too - by simply omitting the item altogether. */      this case, so we do too - by simply omitting the item altogether. */
# Line 935  for (;; ptr++) Line 980  for (;; ptr++)
980      create a suitable repeat item. The code is shared with single-character      create a suitable repeat item. The code is shared with single-character
981      repeats by adding a suitable offset into repeat_type. */      repeats by adding a suitable offset into repeat_type. */
982    
983      else if ((int)*previous < OP_EOD || *previous == OP_ANY)      else if ((int)*previous < OP_EODN || *previous == OP_ANY)
984        {        {
985        op_type = OP_TYPESTAR - OP_STAR;  /* Use type opcodes */        op_type = OP_TYPESTAR - OP_STAR;  /* Use type opcodes */
986        c = *previous;        c = *previous;
# Line 979  for (;; ptr++) Line 1024  for (;; ptr++)
1024          /* If the mininum is 1 and the previous item was a character string,          /* If the mininum is 1 and the previous item was a character string,
1025          we either have to put back the item that got cancelled if the string          we either have to put back the item that got cancelled if the string
1026          length was 1, or add the character back onto the end of a longer          length was 1, or add the character back onto the end of a longer
1027          string. For a character type nothing need be done; it will just get put          string. For a character type nothing need be done; it will just get
1028          back naturally. */          put back naturally. Note that the final character is always going to
1029            get added below. */
1030    
1031          else if (*previous == OP_CHARS)          else if (*previous == OP_CHARS)
1032            {            {
1033            if (code == previous) code += 2; else previous[1]++;            if (code == previous) code += 2; else previous[1]++;
1034            }            }
1035    
1036            /*  For a single negated character we also have to put back the
1037            item that got cancelled. */
1038    
1039            else if (*previous == OP_NOT) code++;
1040    
1041          /* If the maximum is unlimited, insert an OP_STAR. */          /* If the maximum is unlimited, insert an OP_STAR. */
1042    
1043          if (repeat_max < 0)          if (repeat_max < 0)
# Line 1035  for (;; ptr++) Line 1086  for (;; ptr++)
1086        }        }
1087    
1088      /* If previous was a bracket group, we may have to replicate it in certain      /* If previous was a bracket group, we may have to replicate it in certain
1089      cases. If the maximum repeat count is unlimited, check that the bracket      cases. */
     group cannot match the empty string, and diagnose an error if it can. */  
1090    
1091      else if ((int)*previous >= OP_BRA)      else if ((int)*previous >= OP_BRA || (int)*previous == OP_ONCE ||
1092                 (int)*previous == OP_COND)
1093        {        {
1094        int i;        register int i;
1095          int ketoffset = 0;
1096        int len = code - previous;        int len = code - previous;
1097          uschar *bralink = NULL;
1098    
1099          /* If the maximum repeat count is unlimited, find the end of the bracket
1100          by scanning through from the start, and compute the offset back to it
1101          from the current code pointer. There may be an OP_OPT setting following
1102          the final KET, so we can't find the end just by going back from the code
1103          pointer. */
1104    
1105          if (repeat_max == -1)
1106            {
1107            register uschar *ket = previous;
1108            do ket += (ket[1] << 8) + ket[2]; while (*ket != OP_KET);
1109            ketoffset = code - ket;
1110            }
1111    
1112          /* The case of a zero minimum is special because of the need to stick
1113          OP_BRAZERO in front of it, and because the group appears once in the
1114          data, whereas in other cases it appears the minimum number of times. For
1115          this reason, it is simplest to treat this case separately, as otherwise
1116          the code gets far too mess. There are several special subcases when the
1117          minimum is zero. */
1118    
1119        if (repeat_max == -1 && could_be_empty(previous))        if (repeat_min == 0)
1120          {          {
1121          *errorptr = ERR10;          /* If the maximum is also zero, we just omit the group from the output
1122          goto FAILED;          altogether. */
1123    
1124            if (repeat_max == 0)
1125              {
1126              code = previous;
1127              previous = NULL;
1128              break;
1129              }
1130    
1131            /* If the maximum is 1 or unlimited, we just have to stick in the
1132            BRAZERO and do no more at this point. */
1133    
1134            if (repeat_max <= 1)
1135              {
1136              memmove(previous+1, previous, len);
1137              code++;
1138              *previous++ = OP_BRAZERO + repeat_type;
1139              }
1140    
1141            /* If the maximum is greater than 1 and limited, we have to replicate
1142            in a nested fashion, sticking OP_BRAZERO before each set of brackets.
1143            The first one has to be handled carefully because it's the original
1144            copy, which has to be moved up. The remainder can be handled by code
1145            that is common with the non-zero minimum case below. We just have to
1146            adjust the value or repeat_max, since one less copy is required. */
1147    
1148            else
1149              {
1150              int offset;
1151              memmove(previous+4, previous, len);
1152              code += 4;
1153              *previous++ = OP_BRAZERO + repeat_type;
1154              *previous++ = OP_BRA;
1155    
1156              /* We chain together the bracket offset fields that have to be
1157              filled in later when the ends of the brackets are reached. */
1158    
1159              offset = (bralink == NULL)? 0 : previous - bralink;
1160              bralink = previous;
1161              *previous++ = offset >> 8;
1162              *previous++ = offset & 255;
1163              }
1164    
1165            repeat_max--;
1166            }
1167    
1168          /* If the minimum is greater than zero, replicate the group as many
1169          times as necessary, and adjust the maximum to the number of subsequent
1170          copies that we need. */
1171    
1172          else
1173            {
1174            for (i = 1; i < repeat_min; i++)
1175              {
1176              memcpy(code, previous, len);
1177              code += len;
1178              }
1179            if (repeat_max > 0) repeat_max -= repeat_min;
1180          }          }
1181    
1182          /* This code is common to both the zero and non-zero minimum cases. If
1183          the maximum is limited, it replicates the group in a nested fashion,
1184          remembering the bracket starts on a stack. In the case of a zero minimum,
1185          the first one was set up above. In all cases the repeat_max now specifies
1186          the number of additional copies needed. */
1187    
1188          if (repeat_max >= 0)
1189            {
1190            for (i = repeat_max - 1; i >= 0; i--)
1191              {
1192              *code++ = OP_BRAZERO + repeat_type;
1193    
1194              /* All but the final copy start a new nesting, maintaining the
1195              chain of brackets outstanding. */
1196    
1197              if (i != 0)
1198                {
1199                int offset;
1200                *code++ = OP_BRA;
1201                offset = (bralink == NULL)? 0 : code - bralink;
1202                bralink = code;
1203                *code++ = offset >> 8;
1204                *code++ = offset & 255;
1205                }
1206    
1207              memcpy(code, previous, len);
1208              code += len;
1209              }
1210    
1211            /* Now chain through the pending brackets, and fill in their length
1212            fields (which are holding the chain links pro tem). */
1213    
1214            while (bralink != NULL)
1215              {
1216              int oldlinkoffset;
1217              int offset = code - bralink + 1;
1218              uschar *bra = code - offset;
1219              oldlinkoffset = (bra[1] << 8) + bra[2];
1220              bralink = (oldlinkoffset == 0)? NULL : bralink - oldlinkoffset;
1221              *code++ = OP_KET;
1222              *code++ = bra[1] = offset >> 8;
1223              *code++ = bra[2] = (offset & 255);
1224              }
1225            }
1226    
1227          /* If the maximum is unlimited, set a repeater in the final copy. We
1228          can't just offset backwards from the current code point, because we
1229          don't know if there's been an options resetting after the ket. The
1230          correct offset was computed above. */
1231    
1232          else code[-ketoffset] = OP_KETRMAX + repeat_type;
1233    
1234    
1235    #ifdef NEVER
1236        /* If the minimum is greater than zero, and the maximum is unlimited or        /* If the minimum is greater than zero, and the maximum is unlimited or
1237        equal to the minimum, the first copy remains where it is, and is        equal to the minimum, the first copy remains where it is, and is
1238        replicated up to the minimum number of times. This case includes the +        replicated up to the minimum number of times. This case includes the +
# Line 1090  for (;; ptr++) Line 1274  for (;; ptr++)
1274            }            }
1275          }          }
1276    
1277        /* If the maximum is unlimited, set a repeater in the final copy. */        /* If the maximum is unlimited, set a repeater in the final copy. We
1278          can't just offset backwards from the current code point, because we
1279          don't know if there's been an options resetting after the ket. The
1280          correct offset was computed above. */
1281    
1282          if (repeat_max == -1) code[-ketoffset] = OP_KETRMAX + repeat_type;
1283    #endif
1284    
1285    
       if (repeat_max == -1) code[-3] = OP_KETRMAX + repeat_type;  
1286        }        }
1287    
1288      /* Else there's some kind of shambles */      /* Else there's some kind of shambles */
# Line 1109  for (;; ptr++) Line 1299  for (;; ptr++)
1299      break;      break;
1300    
1301    
1302      /* Start of nested bracket sub-expression, or comment or lookahead.      /* Start of nested bracket sub-expression, or comment or lookahead or
1303      First deal with special things that can come after a bracket; all are      lookbehind or option setting or condition. First deal with special things
1304      introduced by ?, and the appearance of any of them means that this is not a      that can come after a bracket; all are introduced by ?, and the appearance
1305      referencing group. They were checked for validity in the first pass over      of any of them means that this is not a referencing group. They were
1306      the string, so we don't have to check for syntax errors here.  */      checked for validity in the first pass over the string, so we don't have to
1307        check for syntax errors here.  */
1308    
1309      case '(':      case '(':
1310      previous = code;              /* Only real brackets can be repeated */      newoptions = options;
1311        condref = -1;
1312    
1313      if (*(++ptr) == '?')      if (*(++ptr) == '?')
1314        {        {
1315        bravalue = OP_BRA;        int set, unset;
1316          int *optset;
1317    
1318        switch (*(++ptr))        switch (*(++ptr))
1319          {          {
1320          case '#':          case '#':                 /* Comment; skip to ket */
         case 'i':  
         case 'm':  
         case 's':  
         case 'x':  
1321          ptr++;          ptr++;
1322          while (*ptr != ')') ptr++;          while (*ptr != ')') ptr++;
         previous = NULL;  
1323          continue;          continue;
1324    
1325          case ':':                 /* Non-extracting bracket */          case ':':                 /* Non-extracting bracket */
1326            bravalue = OP_BRA;
1327          ptr++;          ptr++;
1328          break;          break;
1329    
1330          case '=':                 /* Assertions can't be repeated */          case '(':
1331            bravalue = OP_COND;       /* Conditional group */
1332            if ((cd->ctypes[*(++ptr)] & ctype_digit) != 0)
1333              {
1334              condref = *ptr - '0';
1335              while (*(++ptr) != ')') condref = condref*10 + *ptr - '0';
1336              ptr++;
1337              }
1338            else ptr--;
1339            break;
1340    
1341            case '=':                 /* Positive lookahead */
1342          bravalue = OP_ASSERT;          bravalue = OP_ASSERT;
1343          ptr++;          ptr++;
         previous = NULL;  
1344          break;          break;
1345    
1346          case '!':          case '!':                 /* Negative lookahead */
1347          bravalue = OP_ASSERT_NOT;          bravalue = OP_ASSERT_NOT;
1348          ptr++;          ptr++;
         previous = NULL;  
1349          break;          break;
1350    
1351          case '>':                         /* "Match once" brackets */          case '<':                 /* Lookbehinds */
1352          if ((options & PCRE_EXTRA) != 0)  /* Not yet standard */          switch (*(++ptr))
1353            {            {
1354            bravalue = OP_ONCE;            case '=':               /* Positive lookbehind */
1355              bravalue = OP_ASSERTBACK;
1356              ptr++;
1357              break;
1358    
1359              case '!':               /* Negative lookbehind */
1360              bravalue = OP_ASSERTBACK_NOT;
1361            ptr++;            ptr++;
           previous = NULL;  
1362            break;            break;
1363    
1364              default:                /* Syntax error */
1365              *errorptr = ERR24;
1366              goto FAILED;
1367            }            }
1368          /* Else fall through */          break;
1369    
1370          default:          case '>':                 /* One-time brackets */
1371          *errorptr = ERR12;          bravalue = OP_ONCE;
1372          goto FAILED;          ptr++;
1373            break;
1374    
1375            default:                  /* Option setting */
1376            set = unset = 0;
1377            optset = &set;
1378    
1379            while (*ptr != ')' && *ptr != ':')
1380              {
1381              switch (*ptr++)
1382                {
1383                case '-': optset = &unset; break;
1384    
1385                case 'i': *optset |= PCRE_CASELESS; break;
1386                case 'm': *optset |= PCRE_MULTILINE; break;
1387                case 's': *optset |= PCRE_DOTALL; break;
1388                case 'x': *optset |= PCRE_EXTENDED; break;
1389                case 'U': *optset |= PCRE_UNGREEDY; break;
1390                case 'X': *optset |= PCRE_EXTRA; break;
1391    
1392                default:
1393                *errorptr = ERR12;
1394                goto FAILED;
1395                }
1396              }
1397    
1398            /* Set up the changed option bits, but don't change anything yet. */
1399    
1400            newoptions = (options | set) & (~unset);
1401    
1402            /* If the options ended with ')' this is not the start of a nested
1403            group with option changes, so the options change at this level. At top
1404            level there is nothing else to be done (the options will in fact have
1405            been set from the start of compiling as a result of the first pass) but
1406            at an inner level we must compile code to change the ims options if
1407            necessary, and pass the new setting back so that it can be put at the
1408            start of any following branches, and when this group ends, a resetting
1409            item can be compiled. */
1410    
1411            if (*ptr == ')')
1412              {
1413              if ((options & PCRE_INGROUP) != 0 &&
1414                  (options & PCRE_IMS) != (newoptions & PCRE_IMS))
1415                {
1416                *code++ = OP_OPT;
1417                *code++ = *optchanged = newoptions & PCRE_IMS;
1418                }
1419              options = newoptions;  /* Change options at this level */
1420              previous = NULL;       /* This item can't be repeated */
1421              continue;              /* It is complete */
1422              }
1423    
1424            /* If the options ended with ':' we are heading into a nested group
1425            with possible change of options. Such groups are non-capturing and are
1426            not assertions of any kind. All we need to do is skip over the ':';
1427            the newoptions value is handled below. */
1428    
1429            bravalue = OP_BRA;
1430            ptr++;
1431          }          }
1432        }        }
1433    
1434      /* Else we have a referencing group */      /* Else we have a referencing group; adjust the opcode. */
1435    
1436      else      else
1437        {        {
# Line 1177  for (;; ptr++) Line 1443  for (;; ptr++)
1443        bravalue = OP_BRA + *brackets;        bravalue = OP_BRA + *brackets;
1444        }        }
1445    
1446      /* Process nested bracketed re; at end pointer is on the bracket. We copy      /* Process nested bracketed re. Assertions may not be repeated, but other
1447      code into a non-register variable in order to be able to pass its address      kinds can be. We copy code into a non-register variable in order to be able
1448      because some compilers complain otherwise. */      to pass its address because some compilers complain otherwise. Pass in a
1449        new setting for the ims options if they have changed. */
1450    
1451        previous = (bravalue >= OP_ONCE)? code : NULL;
1452      *code = bravalue;      *code = bravalue;
1453        tempcode = code;
1454    
1455        if (!compile_regex(
1456             options | PCRE_INGROUP,       /* Set for all nested groups */
1457             ((options & PCRE_IMS) != (newoptions & PCRE_IMS))?
1458               newoptions & PCRE_IMS : -1, /* Pass ims options if changed */
1459             brackets,                     /* Bracket level */
1460             &tempcode,                    /* Where to put code (updated) */
1461             &ptr,                         /* Input pointer (updated) */
1462             errorptr,                     /* Where to put an error message */
1463             (bravalue == OP_ASSERTBACK ||
1464              bravalue == OP_ASSERTBACK_NOT), /* TRUE if back assert */
1465             condref,                      /* Condition reference number */
1466             cd))                          /* Tables block */
1467          goto FAILED;
1468    
1469        /* At the end of compiling, code is still pointing to the start of the
1470        group, while tempcode has been updated to point past the end of the group
1471        and any option resetting that may follow it. The pattern pointer (ptr)
1472        is on the bracket. */
1473    
1474        /* If this is a conditional bracket, check that there are no more than
1475        two branches in the group. */
1476    
1477        if (bravalue == OP_COND)
1478        {        {
1479        uschar *mcode = code;        int branchcount = 0;
1480        if (!compile_regex(options, brackets, &mcode, &ptr, errorptr))        uschar *tc = code;
1481    
1482          do {
1483             branchcount++;
1484             tc += (tc[1] << 8) | tc[2];
1485             }
1486          while (*tc != OP_KET);
1487    
1488          if (branchcount > 2)
1489            {
1490            *errorptr = ERR27;
1491          goto FAILED;          goto FAILED;
1492        code = mcode;          }
1493        }        }
1494    
1495        /* Now update the main code pointer to the end of the group. */
1496    
1497        code = tempcode;
1498    
1499        /* Error if hit end of pattern */
1500    
1501      if (*ptr != ')')      if (*ptr != ')')
1502        {        {
1503        *errorptr = ERR14;        *errorptr = ERR14;
# Line 1201  for (;; ptr++) Line 1510  for (;; ptr++)
1510      for validity in the pre-compiling pass. */      for validity in the pre-compiling pass. */
1511    
1512      case '\\':      case '\\':
1513      oldptr = ptr;      tempptr = ptr;
1514      c = check_escape(&ptr, errorptr, *brackets, options, FALSE);      c = check_escape(&ptr, errorptr, *brackets, options, FALSE, cd);
1515    
1516      /* Handle metacharacters introduced by \. For ones like \d, the ESC_ values      /* Handle metacharacters introduced by \. For ones like \d, the ESC_ values
1517      are arranged to be the negation of the corresponding OP_values. For the      are arranged to be the negation of the corresponding OP_values. For the
# Line 1215  for (;; ptr++) Line 1524  for (;; ptr++)
1524        {        {
1525        if (-c >= ESC_REF)        if (-c >= ESC_REF)
1526          {          {
         int refnum = -c - ESC_REF;  
         if (*brackets < refnum)  
           {  
           *errorptr = ERR15;  
           goto FAILED;  
           }  
1527          previous = code;          previous = code;
1528          *code++ = OP_REF;          *code++ = OP_REF;
1529          *code++ = refnum;          *code++ = -c - ESC_REF;
1530          }          }
1531        else        else
1532          {          {
1533          previous = (-c > ESC_b && -c < ESC_X)? code : NULL;          previous = (-c > ESC_b && -c < ESC_Z)? code : NULL;
1534          *code++ = -c;          *code++ = -c;
1535          }          }
1536        continue;        continue;
# Line 1235  for (;; ptr++) Line 1538  for (;; ptr++)
1538    
1539      /* Data character: reset and fall through */      /* Data character: reset and fall through */
1540    
1541      ptr = oldptr;      ptr = tempptr;
1542      c = '\\';      c = '\\';
1543    
1544      /* Handle a run of data characters until a metacharacter is encountered.      /* Handle a run of data characters until a metacharacter is encountered.
# Line 1253  for (;; ptr++) Line 1556  for (;; ptr++)
1556        {        {
1557        if ((options & PCRE_EXTENDED) != 0)        if ((options & PCRE_EXTENDED) != 0)
1558          {          {
1559          if ((pcre_ctypes[c] & ctype_space) != 0) continue;          if ((cd->ctypes[c] & ctype_space) != 0) continue;
1560          if (c == '#')          if (c == '#')
1561            {            {
1562            while ((c = *(++ptr)) != 0 && c != '\n');            while ((c = *(++ptr)) != 0 && c != '\n');
# Line 1268  for (;; ptr++) Line 1571  for (;; ptr++)
1571    
1572        if (c == '\\')        if (c == '\\')
1573          {          {
1574          oldptr = ptr;          tempptr = ptr;
1575          c = check_escape(&ptr, errorptr, *brackets, options, FALSE);          c = check_escape(&ptr, errorptr, *brackets, options, FALSE, cd);
1576          if (c < 0) { ptr = oldptr; break; }          if (c < 0) { ptr = tempptr; break; }
1577          }          }
1578    
1579        /* Ordinary character or single-char escape */        /* Ordinary character or single-char escape */
# Line 1281  for (;; ptr++) Line 1584  for (;; ptr++)
1584    
1585      /* This "while" is the end of the "do" above. */      /* This "while" is the end of the "do" above. */
1586    
1587      while (length < 255 && (pcre_ctypes[c = *(++ptr)] & ctype_meta) == 0);      while (length < 255 && (cd->ctypes[c = *(++ptr)] & ctype_meta) == 0);
1588    
1589      /* Compute the length and set it in the data vector, and advance to      /* Compute the length and set it in the data vector, and advance to
1590      the next state. */      the next state. */
1591    
1592      previous[1] = length;      previous[1] = length;
1593      ptr--;      if (length < 255) ptr--;
1594      break;      break;
1595      }      }
1596    }                   /* end of big loop */    }                   /* end of big loop */
# Line 1311  return FALSE; Line 1614  return FALSE;
1614  /* On entry, ptr is pointing past the bracket character, but on return  /* On entry, ptr is pointing past the bracket character, but on return
1615  it points to the closing bracket, or vertical bar, or end of string.  it points to the closing bracket, or vertical bar, or end of string.
1616  The code variable is pointing at the byte into which the BRA operator has been  The code variable is pointing at the byte into which the BRA operator has been
1617  stored.  stored. If the ims options are changed at the start (for a (?ims: group) or
1618    during any branch, we need to insert an OP_OPT item at the start of every
1619    following branch to ensure they get set correctly at run time, and also pass
1620    the new options into every subsequent branch compile.
1621    
1622  Argument:  Argument:
1623    options   the option bits    options     the option bits
1624    brackets  -> int containing the number of extracting brackets used    optchanged  new ims options to set as if (?ims) were at the start, or -1
1625    codeptr   -> the address of the current code pointer                 for no change
1626    ptrptr    -> the address of the current pattern pointer    brackets    -> int containing the number of extracting brackets used
1627    errorptr  -> pointer to error message    codeptr     -> the address of the current code pointer
1628      ptrptr      -> the address of the current pattern pointer
1629      errorptr    -> pointer to error message
1630      lookbehind  TRUE if this is a lookbehind assertion
1631      condref     > 0 for OPT_CREF setting at start of conditional group
1632      cd          points to the data block with tables pointers
1633    
1634  Returns:    TRUE on success  Returns:      TRUE on success
1635  */  */
1636    
1637  static BOOL  static BOOL
1638  compile_regex(int options, int *brackets, uschar **codeptr,  compile_regex(int options, int optchanged, int *brackets, uschar **codeptr,
1639    const uschar **ptrptr, const char **errorptr)    const uschar **ptrptr, const char **errorptr, BOOL lookbehind, int condref,
1640      compile_data *cd)
1641  {  {
1642  const uschar *ptr = *ptrptr;  const uschar *ptr = *ptrptr;
1643  uschar *code = *codeptr;  uschar *code = *codeptr;
1644    uschar *last_branch = code;
1645  uschar *start_bracket = code;  uschar *start_bracket = code;
1646    uschar *reverse_count = NULL;
1647    int oldoptions = options & PCRE_IMS;
1648    
1649    code += 3;
1650    
1651    /* At the start of a reference-based conditional group, insert the reference
1652    number as an OP_CREF item. */
1653    
1654    if (condref > 0)
1655      {
1656      *code++ = OP_CREF;
1657      *code++ = condref;
1658      }
1659    
1660    /* Loop for each alternative branch */
1661    
1662  for (;;)  for (;;)
1663    {    {
1664    int length;    int length;
   uschar *last_branch = code;  
1665    
1666    code += 3;    /* Handle change of options */
1667    if (!compile_branch(options, brackets, &code, &ptr, errorptr))  
1668      if (optchanged >= 0)
1669        {
1670        *code++ = OP_OPT;
1671        *code++ = optchanged;
1672        options = (options & ~PCRE_IMS) | optchanged;
1673        }
1674    
1675      /* Set up dummy OP_REVERSE if lookbehind assertion */
1676    
1677      if (lookbehind)
1678        {
1679        *code++ = OP_REVERSE;
1680        reverse_count = code;
1681        *code++ = 0;
1682        *code++ = 0;
1683        }
1684    
1685      /* Now compile the branch */
1686    
1687      if (!compile_branch(options,brackets,&code,&ptr,errorptr,&optchanged,cd))
1688      {      {
1689      *ptrptr = ptr;      *ptrptr = ptr;
1690      return FALSE;      return FALSE;
# Line 1349  for (;;) Line 1696  for (;;)
1696    last_branch[1] = length >> 8;    last_branch[1] = length >> 8;
1697    last_branch[2] = length & 255;    last_branch[2] = length & 255;
1698    
1699      /* If lookbehind, check that this branch matches a fixed-length string,
1700      and put the length into the OP_REVERSE item. Temporarily mark the end of
1701      the branch with OP_END. */
1702    
1703      if (lookbehind)
1704        {
1705        *code = OP_END;
1706        length = find_fixedlength(last_branch);
1707        DPRINTF(("fixed length = %d\n", length));
1708        if (length < 0)
1709          {
1710          *errorptr = ERR25;
1711          *ptrptr = ptr;
1712          return FALSE;
1713          }
1714        reverse_count[0] = (length >> 8);
1715        reverse_count[1] = length & 255;
1716        }
1717    
1718    /* Reached end of expression, either ')' or end of pattern. Insert a    /* Reached end of expression, either ')' or end of pattern. Insert a
1719    terminating ket and the length of the whole bracketed item, and return,    terminating ket and the length of the whole bracketed item, and return,
1720    leaving the pointer at the terminating char. */    leaving the pointer at the terminating char. If any of the ims options
1721      were changed inside the group, compile a resetting op-code following. */
1722    
1723    if (*ptr != '|')    if (*ptr != '|')
1724      {      {
# Line 1359  for (;;) Line 1726  for (;;)
1726      *code++ = OP_KET;      *code++ = OP_KET;
1727      *code++ = length >> 8;      *code++ = length >> 8;
1728      *code++ = length & 255;      *code++ = length & 255;
1729        if (optchanged >= 0)
1730          {
1731          *code++ = OP_OPT;
1732          *code++ = oldoptions;
1733          }
1734      *codeptr = code;      *codeptr = code;
1735      *ptrptr = ptr;      *ptrptr = ptr;
1736      return TRUE;      return TRUE;
# Line 1367  for (;;) Line 1739  for (;;)
1739    /* Another branch follows; insert an "or" node and advance the pointer. */    /* Another branch follows; insert an "or" node and advance the pointer. */
1740    
1741    *code = OP_ALT;    *code = OP_ALT;
1742      last_branch = code;
1743      code += 3;
1744    ptr++;    ptr++;
1745    }    }
1746  /* Control never reaches here */  /* Control never reaches here */
# Line 1374  for (;;) Line 1748  for (;;)
1748    
1749    
1750    
1751    
1752    /*************************************************
1753    *      Find first significant op code            *
1754    *************************************************/
1755    
1756    /* This is called by several functions that scan a compiled expression looking
1757    for a fixed first character, or an anchoring op code etc. It skips over things
1758    that do not influence this. For one application, a change of caseless option is
1759    important.
1760    
1761    Arguments:
1762      code       pointer to the start of the group
1763      options    pointer to external options
1764      optbit     the option bit whose changing is significant, or
1765                 zero if none are
1766      optstop    TRUE to return on option change, otherwise change the options
1767                   value and continue
1768    
1769    Returns:     pointer to the first significant opcode
1770    */
1771    
1772    static const uschar*
1773    first_significant_code(const uschar *code, int *options, int optbit,
1774      BOOL optstop)
1775    {
1776    for (;;)
1777      {
1778      switch ((int)*code)
1779        {
1780        case OP_OPT:
1781        if (optbit > 0 && ((int)code[1] & optbit) != (*options & optbit))
1782          {
1783          if (optstop) return code;
1784          *options = (int)code[1];
1785          }
1786        code += 2;
1787        break;
1788    
1789        case OP_CREF:
1790        code += 2;
1791        break;
1792    
1793        case OP_ASSERT_NOT:
1794        case OP_ASSERTBACK:
1795        case OP_ASSERTBACK_NOT:
1796        do code += (code[1] << 8) + code[2]; while (*code == OP_ALT);
1797        code += 3;
1798        break;
1799    
1800        default:
1801        return code;
1802        }
1803      }
1804    /* Control never reaches here */
1805    }
1806    
1807    
1808    
1809    
1810  /*************************************************  /*************************************************
1811  *          Check for anchored expression         *  *          Check for anchored expression         *
1812  *************************************************/  *************************************************/
# Line 1384  all of whose alternatives start with OP_ Line 1817  all of whose alternatives start with OP_
1817  it's anchored. However, if this is a multiline pattern, then only OP_SOD  it's anchored. However, if this is a multiline pattern, then only OP_SOD
1818  counts, since OP_CIRC can match in the middle.  counts, since OP_CIRC can match in the middle.
1819    
1820  A branch is also implicitly anchored if it starts with .* because that will try  A branch is also implicitly anchored if it starts with .* and DOTALL is set,
1821  the rest of the pattern at all possible matching points, so there is no point  because that will try the rest of the pattern at all possible matching points,
1822  trying them again.  so there is no point trying them again.
1823    
1824  Argument:  points to start of expression (the bracket)  Arguments:
1825  Returns:   TRUE or FALSE    code       points to start of expression (the bracket)
1826      options    points to the options setting
1827    
1828    Returns:     TRUE or FALSE
1829  */  */
1830    
1831  static BOOL  static BOOL
1832  is_anchored(register const uschar *code, BOOL multiline)  is_anchored(register const uschar *code, int *options)
1833  {  {
1834  do {  do {
1835     int op = (int)code[3];     const uschar *scode = first_significant_code(code + 3, options,
1836     if (op >= OP_BRA || op == OP_ASSERT || op == OP_ONCE)       PCRE_MULTILINE, FALSE);
1837       { if (!is_anchored(code+3, multiline)) return FALSE; }     register int op = *scode;
1838     else if (op == OP_TYPESTAR || op == OP_TYPEMINSTAR)     if (op >= OP_BRA || op == OP_ASSERT || op == OP_ONCE || op == OP_COND)
1839       { if (code[4] != OP_ANY) return FALSE; }       { if (!is_anchored(scode, options)) return FALSE; }
1840     else if (op != OP_SOD && (multiline || op != OP_CIRC)) return FALSE;     else if ((op == OP_TYPESTAR || op == OP_TYPEMINSTAR) &&
1841                (*options & PCRE_DOTALL) != 0)
1842         { if (scode[1] != OP_ANY) return FALSE; }
1843       else if (op != OP_SOD &&
1844               ((*options & PCRE_MULTILINE) != 0 || op != OP_CIRC))
1845         return FALSE;
1846     code += (code[1] << 8) + code[2];     code += (code[1] << 8) + code[2];
1847     }     }
1848  while (*code == OP_ALT);  while (*code == OP_ALT);
# Line 1411  return TRUE; Line 1852  return TRUE;
1852    
1853    
1854  /*************************************************  /*************************************************
1855  *     Check for start with \n line expression    *  *         Check for starting with ^ or .*        *
1856  *************************************************/  *************************************************/
1857    
1858  /* This is called for multiline expressions to try to find out if every branch  /* This is called to find out if every branch starts with ^ or .* so that
1859  starts with ^ so that "first char" processing can be done to speed things up.  "first char" processing can be done to speed things up in multiline
1860    matching and for non-DOTALL patterns that start with .* (which must start at
1861    the beginning or after \n).
1862    
1863  Argument:  points to start of expression (the bracket)  Argument:  points to start of expression (the bracket)
1864  Returns:   TRUE or FALSE  Returns:   TRUE or FALSE
# Line 1425  static BOOL Line 1868  static BOOL
1868  is_startline(const uschar *code)  is_startline(const uschar *code)
1869  {  {
1870  do {  do {
1871     if ((int)code[3] >= OP_BRA || code[3] == OP_ASSERT)     const uschar *scode = first_significant_code(code + 3, NULL, 0, FALSE);
1872       { if (!is_startline(code+3)) return FALSE; }     register int op = *scode;
1873     else if (code[3] != OP_CIRC) return FALSE;     if (op >= OP_BRA || op == OP_ASSERT || op == OP_ONCE || op == OP_COND)
1874         { if (!is_startline(scode)) return FALSE; }
1875       else if (op == OP_TYPESTAR || op == OP_TYPEMINSTAR)
1876         { if (scode[1] != OP_ANY) return FALSE; }
1877       else if (op != OP_CIRC) return FALSE;
1878     code += (code[1] << 8) + code[2];     code += (code[1] << 8) + code[2];
1879     }     }
1880  while (*code == OP_ALT);  while (*code == OP_ALT);
# Line 1446  Consider each alternative branch. If the Line 1893  Consider each alternative branch. If the
1893  a bracket all of whose alternatives start with the same char (recurse ad lib),  a bracket all of whose alternatives start with the same char (recurse ad lib),
1894  then we return that char, otherwise -1.  then we return that char, otherwise -1.
1895    
1896  Argument:  points to start of expression (the bracket)  Arguments:
1897  Returns:   -1 or the fixed first char    code       points to start of expression (the bracket)
1898      options    pointer to the options (used to check casing changes)
1899    
1900    Returns:     -1 or the fixed first char
1901  */  */
1902    
1903  static int  static int
1904  find_firstchar(uschar *code)  find_firstchar(const uschar *code, int *options)
1905  {  {
1906  register int c = -1;  register int c = -1;
1907  do  do {
1908    {     int d;
1909    register int charoffset = 4;     const uschar *scode = first_significant_code(code + 3, options,
1910         PCRE_CASELESS, TRUE);
1911    if ((int)code[3] >= OP_BRA || code[3] == OP_ASSERT)     register int op = *scode;
1912      {  
1913      register int d;     if (op >= OP_BRA) op = OP_BRA;
1914      if ((d = find_firstchar(code+3)) < 0) return -1;  
1915      if (c < 0) c = d; else if (c != d) return -1;     switch(op)
1916      }       {
1917         default:
1918    else switch(code[3])       return -1;
1919      {  
1920      default:       case OP_BRA:
1921      return -1;       case OP_ASSERT:
1922         case OP_ONCE:
1923      case OP_EXACT:       /* Fall through */       case OP_COND:
1924      charoffset++;       if ((d = find_firstchar(scode, options)) < 0) return -1;
1925         if (c < 0) c = d; else if (c != d) return -1;
1926      case OP_CHARS:       /* Fall through */       break;
1927      charoffset++;  
1928         case OP_EXACT:       /* Fall through */
1929         scode++;
1930    
1931         case OP_CHARS:       /* Fall through */
1932         scode++;
1933    
1934         case OP_PLUS:
1935         case OP_MINPLUS:
1936         if (c < 0) c = scode[1]; else if (c != scode[1]) return -1;
1937         break;
1938         }
1939    
1940      case OP_PLUS:     code += (code[1] << 8) + code[2];
1941      case OP_MINPLUS:     }
     if (c < 0) c = code[charoffset]; else if (c != code[charoffset]) return -1;  
     break;  
     }  
   code += (code[1] << 8) + code[2];  
   }  
1942  while (*code == OP_ALT);  while (*code == OP_ALT);
1943  return c;  return c;
1944  }  }
1945    
1946    
1947    
1948    
1949    
1950  /*************************************************  /*************************************************
1951  *        Compile a Regular Expression            *  *        Compile a Regular Expression            *
1952  *************************************************/  *************************************************/
# Line 1501  Arguments: Line 1959  Arguments:
1959    options      various option bits    options      various option bits
1960    errorptr     pointer to pointer to error text    errorptr     pointer to pointer to error text
1961    erroroffset  ptr offset in pattern where error was detected    erroroffset  ptr offset in pattern where error was detected
1962      tables       pointer to character tables or NULL
1963    
1964  Returns:       pointer to compiled data block, or NULL on error,  Returns:       pointer to compiled data block, or NULL on error,
1965                 with errorptr and erroroffset set                 with errorptr and erroroffset set
# Line 1508  Returns:       pointer to compiled data Line 1967  Returns:       pointer to compiled data
1967    
1968  pcre *  pcre *
1969  pcre_compile(const char *pattern, int options, const char **errorptr,  pcre_compile(const char *pattern, int options, const char **errorptr,
1970    int *erroroffset)    int *erroroffset, const unsigned char *tables)
1971  {  {
1972  real_pcre *re;  real_pcre *re;
 int spaces = 0;  
1973  int length = 3;      /* For initial BRA plus length */  int length = 3;      /* For initial BRA plus length */
1974  int runlength;  int runlength;
1975  int c, size;  int c, size;
1976  int bracount = 0;  int bracount = 0;
 int brastack[200];  
1977  int top_backref = 0;  int top_backref = 0;
1978    int branch_extra = 0;
1979    int branch_newextra;
1980  unsigned int brastackptr = 0;  unsigned int brastackptr = 0;
1981  uschar *code;  uschar *code;
1982  const uschar *ptr;  const uschar *ptr;
1983    compile_data compile_block;
1984    int brastack[BRASTACK_SIZE];
1985    uschar bralenstack[BRASTACK_SIZE];
1986    
1987  #ifdef DEBUG  #ifdef DEBUG
1988  uschar *code_base, *code_end;  uschar *code_base, *code_end;
# Line 1547  if ((options & ~PUBLIC_OPTIONS) != 0) Line 2009  if ((options & ~PUBLIC_OPTIONS) != 0)
2009    return NULL;    return NULL;
2010    }    }
2011    
2012    /* Set up pointers to the individual character tables */
2013    
2014    if (tables == NULL) tables = pcre_default_tables;
2015    compile_block.lcc = tables + lcc_offset;
2016    compile_block.fcc = tables + fcc_offset;
2017    compile_block.cbits = tables + cbits_offset;
2018    compile_block.ctypes = tables + ctypes_offset;
2019    
2020    /* Reflect pattern for debugging output */
2021    
2022  DPRINTF(("------------------------------------------------------------------\n"));  DPRINTF(("------------------------------------------------------------------\n"));
2023  DPRINTF(("%s\n", pattern));  DPRINTF(("%s\n", pattern));
2024    
# Line 1563  while ((c = *(++ptr)) != 0) Line 2035  while ((c = *(++ptr)) != 0)
2035    int min, max;    int min, max;
2036    int class_charcount;    int class_charcount;
2037    
2038    if ((pcre_ctypes[c] & ctype_space) != 0)    if ((options & PCRE_EXTENDED) != 0)
     {  
     if ((options & PCRE_EXTENDED) != 0) continue;  
     spaces++;  
     }  
   
   if (c == '#' && (options & PCRE_EXTENDED) != 0)  
2039      {      {
2040      while ((c = *(++ptr)) != 0 && c != '\n');      if ((compile_block.ctypes[c] & ctype_space) != 0) continue;
2041      continue;      if (c == '#')
2042          {
2043          while ((c = *(++ptr)) != 0 && c != '\n');
2044          continue;
2045          }
2046      }      }
2047    
2048    switch(c)    switch(c)
# Line 1585  while ((c = *(++ptr)) != 0) Line 2055  while ((c = *(++ptr)) != 0)
2055      case '\\':      case '\\':
2056        {        {
2057        const uschar *save_ptr = ptr;        const uschar *save_ptr = ptr;
2058        c = check_escape(&ptr, errorptr, bracount, options, FALSE);        c = check_escape(&ptr, errorptr, bracount, options, FALSE, &compile_block);
2059        if (*errorptr != NULL) goto PCRE_ERROR_RETURN;        if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
2060        if (c >= 0)        if (c >= 0)
2061          {          {
# Line 1605  while ((c = *(++ptr)) != 0) Line 2075  while ((c = *(++ptr)) != 0)
2075        int refnum = -c - ESC_REF;        int refnum = -c - ESC_REF;
2076        if (refnum > top_backref) top_backref = refnum;        if (refnum > top_backref) top_backref = refnum;
2077        length++;   /* For single back reference */        length++;   /* For single back reference */
2078        if (ptr[1] == '{' && is_counted_repeat(ptr+2))        if (ptr[1] == '{' && is_counted_repeat(ptr+2, &compile_block))
2079          {          {
2080          ptr = read_repeat_counts(ptr+2, &min, &max, errorptr);          ptr = read_repeat_counts(ptr+2, &min, &max, errorptr, &compile_block);
2081          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
2082          if ((min == 0 && (max == 1 || max == -1)) ||          if ((min == 0 && (max == 1 || max == -1)) ||
2083            (min == 1 && max == -1))            (min == 1 && max == -1))
# Line 1631  while ((c = *(++ptr)) != 0) Line 2101  while ((c = *(++ptr)) != 0)
2101      or back reference. */      or back reference. */
2102    
2103      case '{':      case '{':
2104      if (!is_counted_repeat(ptr+1)) goto NORMAL_CHAR;      if (!is_counted_repeat(ptr+1, &compile_block)) goto NORMAL_CHAR;
2105      ptr = read_repeat_counts(ptr+1, &min, &max, errorptr);      ptr = read_repeat_counts(ptr+1, &min, &max, errorptr, &compile_block);
2106      if (*errorptr != NULL) goto PCRE_ERROR_RETURN;      if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
2107      if ((min == 0 && (max == 1 || max == -1)) ||      if ((min == 0 && (max == 1 || max == -1)) ||
2108        (min == 1 && max == -1))        (min == 1 && max == -1))
# Line 1646  while ((c = *(++ptr)) != 0) Line 2116  while ((c = *(++ptr)) != 0)
2116      if (ptr[1] == '?') ptr++;      if (ptr[1] == '?') ptr++;
2117      continue;      continue;
2118    
2119      /* An alternation contains an offset to the next branch or ket. */      /* An alternation contains an offset to the next branch or ket. If any ims
2120        options changed in the previous branch(es), and/or if we are in a
2121        lookbehind assertion, extra space will be needed at the start of the
2122        branch. This is handled by branch_extra. */
2123    
2124      case '|':      case '|':
2125      length += 3;      length += 3 + branch_extra;
2126      continue;      continue;
2127    
2128      /* A character class uses 33 characters. Don't worry about character types      /* A character class uses 33 characters. Don't worry about character types
# Line 1663  while ((c = *(++ptr)) != 0) Line 2137  while ((c = *(++ptr)) != 0)
2137        {        {
2138        if (*ptr == '\\')        if (*ptr == '\\')
2139          {          {
2140          int ch = check_escape(&ptr, errorptr, bracount, options, TRUE);          int ch = check_escape(&ptr, errorptr, bracount, options, TRUE,
2141              &compile_block);
2142          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
2143          if (-ch == ESC_b) class_charcount++; else class_charcount = 10;          if (-ch == ESC_b) class_charcount++; else class_charcount = 10;
2144          }          }
# Line 1680  while ((c = *(++ptr)) != 0) Line 2155  while ((c = *(++ptr)) != 0)
2155    
2156        /* A repeat needs either 1 or 5 bytes. */        /* A repeat needs either 1 or 5 bytes. */
2157    
2158        if (*ptr != 0 && ptr[1] == '{' && is_counted_repeat(ptr+2))        if (*ptr != 0 && ptr[1] == '{' && is_counted_repeat(ptr+2, &compile_block))
2159          {          {
2160          ptr = read_repeat_counts(ptr+2, &min, &max, errorptr);          ptr = read_repeat_counts(ptr+2, &min, &max, errorptr, &compile_block);
2161          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
2162          if ((min == 0 && (max == 1 || max == -1)) ||          if ((min == 0 && (max == 1 || max == -1)) ||
2163            (min == 1 && max == -1))            (min == 1 && max == -1))
# Line 1695  while ((c = *(++ptr)) != 0) Line 2170  while ((c = *(++ptr)) != 0)
2170    
2171      /* Brackets may be genuine groups or special things */      /* Brackets may be genuine groups or special things */
2172    
2173      case '(':      case '(':
2174        branch_newextra = 0;
2175    
2176        /* Handle special forms of bracket, which all start (? */
2177    
2178        if (ptr[1] == '?')
2179          {
2180          int set, unset;
2181          int *optset;
2182    
2183          switch (c = ptr[2])
2184            {
2185            /* Skip over comments entirely */
2186            case '#':
2187            ptr += 3;
2188            while (*ptr != 0 && *ptr != ')') ptr++;
2189            if (*ptr == 0)
2190              {
2191              *errorptr = ERR18;
2192              goto PCRE_ERROR_RETURN;
2193              }
2194            continue;
2195    
2196            /* Non-referencing groups and lookaheads just move the pointer on, and
2197            then behave like a non-special bracket, except that they don't increment
2198            the count of extracting brackets. Ditto for the "once only" bracket,
2199            which is in Perl from version 5.005. */
2200    
2201            case ':':
2202            case '=':
2203            case '!':
2204            case '>':
2205            ptr += 2;
2206            break;
2207    
2208            /* Lookbehinds are in Perl from version 5.005 */
2209    
2210            case '<':
2211            if (ptr[3] == '=' || ptr[3] == '!')
2212              {
2213              ptr += 3;
2214              branch_newextra = 3;
2215              length += 3;         /* For the first branch */
2216              break;
2217              }
2218            *errorptr = ERR24;
2219            goto PCRE_ERROR_RETURN;
2220    
2221            /* Conditionals are in Perl from version 5.005. The bracket must either
2222            be followed by a number (for bracket reference) or by an assertion
2223            group. */
2224    
2225            case '(':
2226            if ((compile_block.ctypes[ptr[3]] & ctype_digit) != 0)
2227              {
2228              ptr += 4;
2229              length += 2;
2230              while ((compile_block.ctypes[*ptr] & ctype_digit) != 0) ptr++;
2231              if (*ptr != ')')
2232                {
2233                *errorptr = ERR26;
2234                goto PCRE_ERROR_RETURN;
2235                }
2236              }
2237            else   /* An assertion must follow */
2238              {
2239              ptr++;   /* Can treat like ':' as far as spacing is concerned */
2240    
2241              if (ptr[2] != '?' || strchr("=!<", ptr[3]) == NULL)
2242                {
2243                ptr += 2;    /* To get right offset in message */
2244                *errorptr = ERR28;
2245                goto PCRE_ERROR_RETURN;
2246                }
2247              }
2248            break;
2249    
2250            /* Else loop checking valid options until ) is met. Anything else is an
2251            error. If we are without any brackets, i.e. at top level, the settings
2252            act as if specified in the options, so massage the options immediately.
2253            This is for backward compatibility with Perl 5.004. */
2254    
2255            default:
2256            set = unset = 0;
2257            optset = &set;
2258            ptr += 2;
2259    
2260            for (;; ptr++)
2261              {
2262              c = *ptr;
2263              switch (c)
2264                {
2265                case 'i':
2266                *optset |= PCRE_CASELESS;
2267                continue;
2268    
2269                case 'm':
2270                *optset |= PCRE_MULTILINE;
2271                continue;
2272    
2273                case 's':
2274                *optset |= PCRE_DOTALL;
2275                continue;
2276    
2277                case 'x':
2278                *optset |= PCRE_EXTENDED;
2279                continue;
2280    
2281                case 'X':
2282                *optset |= PCRE_EXTRA;
2283                continue;
2284    
2285                case 'U':
2286                *optset |= PCRE_UNGREEDY;
2287                continue;
2288    
2289      /* Handle special forms of bracket, which all start (? */              case '-':
2290                optset = &unset;
2291                continue;
2292    
2293      if (ptr[1] == '?') switch (c = ptr[2])              /* A termination by ')' indicates an options-setting-only item;
2294        {              this is global at top level; otherwise nothing is done here and
2295        /* Skip over comments entirely */              it is handled during the compiling process on a per-bracket-group
2296        case '#':              basis. */
       ptr += 3;  
       while (*ptr != 0 && *ptr != ')') ptr++;  
       if (*ptr == 0)  
         {  
         *errorptr = ERR18;  
         goto PCRE_ERROR_RETURN;  
         }  
       continue;  
2297    
2298        /* Non-referencing groups and lookaheads just move the pointer on, and              case ')':
2299        then behave like a non-special bracket, except that they don't increment              if (brastackptr == 0)
2300        the count of extracting brackets. */                {
2301                  options = (options | set) & (~unset);
2302        case ':':                set = unset = 0;     /* To save length */
2303        case '=':                }
2304        case '!':              /* Fall through */
       ptr += 2;  
       break;  
2305    
2306        /* Ditto for the "once only" bracket, allowed only if the extra bit              /* A termination by ':' indicates the start of a nested group with
2307        is set. */              the given options set. This is again handled at compile time, but
2308                we must allow for compiled space if any of the ims options are
2309                set. We also have to allow for resetting space at the end of
2310                the group, which is why 4 is added to the length and not just 2.
2311                If there are several changes of options within the same group, this
2312                will lead to an over-estimate on the length, but this shouldn't
2313                matter very much. We also have to allow for resetting options at
2314                the start of any alternations, which we do by setting
2315                branch_newextra to 2. */
2316    
2317        case '>':              case ':':
2318        if ((options & PCRE_EXTRA) != 0)              if (((set|unset) & PCRE_IMS) != 0)
2319          {                {
2320          ptr += 2;                length += 4;
2321          break;                branch_newextra = 2;
2322          }                }
2323        /* Else fall thourh */              goto END_OPTIONS;
2324    
2325        /* Else loop setting valid options until ) is met. Anything else is an              /* Unrecognized option character */
       error. */  
2326    
2327        default:              default:
2328        ptr += 2;              *errorptr = ERR12;
2329        for (;; ptr++)              goto PCRE_ERROR_RETURN;
2330          {              }
         if ((c = *ptr) == 'i')  
           {  
           options |= PCRE_CASELESS;  
           continue;  
           }  
         else if ((c = *ptr) == 'm')  
           {  
           options |= PCRE_MULTILINE;  
           continue;  
           }  
         else if (c == 's')  
           {  
           options |= PCRE_DOTALL;  
           continue;  
2331            }            }
2332          else if (c == 'x')  
2333            /* If we hit a closing bracket, that's it - this is a freestanding
2334            option-setting. We need to ensure that branch_extra is updated if
2335            necessary. The only values branch_newextra can have here are 0 or 2.
2336            If the value is 2, then branch_extra must either be 2 or 5, depending
2337            on whether this is a lookbehind group or not. */
2338    
2339            END_OPTIONS:
2340            if (c == ')')
2341            {            {
2342            options |= PCRE_EXTENDED;            if (branch_newextra == 2 && (branch_extra == 0 || branch_extra == 3))
2343            length -= spaces;          /* Already counted spaces */              branch_extra += branch_newextra;
2344            continue;            continue;
2345            }            }
         else if (c == ')') break;  
2346    
2347          *errorptr = ERR12;          /* If options were terminated by ':' control comes here. Fall through
2348          goto PCRE_ERROR_RETURN;          to handle the group below. */
2349          }          }
       continue;                      /* End of this bracket handling */  
2350        }        }
2351    
2352      /* Extracting brackets must be counted so we can process escapes in a      /* Extracting brackets must be counted so we can process escapes in a
# Line 1775  while ((c = *(++ptr)) != 0) Line 2355  while ((c = *(++ptr)) != 0)
2355      else bracount++;      else bracount++;
2356    
2357      /* Non-special forms of bracket. Save length for computing whole length      /* Non-special forms of bracket. Save length for computing whole length
2358      at end if there's a repeat that requires duplication of the group. */      at end if there's a repeat that requires duplication of the group. Also
2359        save the current value of branch_extra, and start the new group with
2360        the new value. If non-zero, this will either be 2 for a (?imsx: group, or 3
2361        for a lookbehind assertion. */
2362    
2363      if (brastackptr >= sizeof(brastack)/sizeof(int))      if (brastackptr >= sizeof(brastack)/sizeof(int))
2364        {        {
# Line 1783  while ((c = *(++ptr)) != 0) Line 2366  while ((c = *(++ptr)) != 0)
2366        goto PCRE_ERROR_RETURN;        goto PCRE_ERROR_RETURN;
2367        }        }
2368    
2369        bralenstack[brastackptr] = branch_extra;
2370        branch_extra = branch_newextra;
2371    
2372      brastack[brastackptr++] = length;      brastack[brastackptr++] = length;
2373      length += 3;      length += 3;
2374      continue;      continue;
# Line 1790  while ((c = *(++ptr)) != 0) Line 2376  while ((c = *(++ptr)) != 0)
2376      /* Handle ket. Look for subsequent max/min; for certain sets of values we      /* Handle ket. Look for subsequent max/min; for certain sets of values we
2377      have to replicate this bracket up to that many times. If brastackptr is      have to replicate this bracket up to that many times. If brastackptr is
2378      0 this is an unmatched bracket which will generate an error, but take care      0 this is an unmatched bracket which will generate an error, but take care
2379      not to try to access brastack[-1]. */      not to try to access brastack[-1] when computing the length and restoring
2380        the branch_extra value. */
2381    
2382      case ')':      case ')':
2383      length += 3;      length += 3;
2384        {        {
2385        int minval = 1;        int minval = 1;
2386        int maxval = 1;        int maxval = 1;
2387        int duplength = (brastackptr > 0)? length - brastack[--brastackptr] : 0;        int duplength;
2388    
2389          if (brastackptr > 0)
2390            {
2391            duplength = length - brastack[--brastackptr];
2392            branch_extra = bralenstack[brastackptr];
2393            }
2394          else duplength = 0;
2395    
2396        /* Leave ptr at the final char; for read_repeat_counts this happens        /* Leave ptr at the final char; for read_repeat_counts this happens
2397        automatically; for the others we need an increment. */        automatically; for the others we need an increment. */
2398    
2399        if ((c = ptr[1]) == '{' && is_counted_repeat(ptr+2))        if ((c = ptr[1]) == '{' && is_counted_repeat(ptr+2, &compile_block))
2400          {          {
2401          ptr = read_repeat_counts(ptr+2, &minval, &maxval, errorptr);          ptr = read_repeat_counts(ptr+2, &minval, &maxval, errorptr,
2402              &compile_block);
2403          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
2404          }          }
2405        else if (c == '*') { minval = 0; maxval = -1; ptr++; }        else if (c == '*') { minval = 0; maxval = -1; ptr++; }
2406        else if (c == '+') { maxval = -1; ptr++; }        else if (c == '+') { maxval = -1; ptr++; }
2407        else if (c == '?') { minval = 0; ptr++; }        else if (c == '?') { minval = 0; ptr++; }
2408    
2409        /* If there is a minimum > 1 we have to replicate up to minval-1 times;        /* If the minimum is zero, we have to allow for an OP_BRAZERO before the
2410        if there is a limited maximum we have to replicate up to maxval-1 times        group, and if the maximum is greater than zero, we have to replicate
2411        and allow for a BRAZERO item before each optional copy, as we also have        maxval-1 times; each replication acquires an OP_BRAZERO plus a nesting
2412        to do before the first copy if the minimum is zero. */        bracket set - hence the 7. */
2413    
2414        if (minval == 0) length++;        if (minval == 0)
2415          else if (minval > 1) length += (minval - 1) * duplength;          {
2416        if (maxval > minval) length += (maxval - minval) * (duplength + 1);          length++;
2417            if (maxval > 0) length += (maxval - 1) * (duplength + 7);
2418            }
2419    
2420          /* When the minimum is greater than zero, 1 we have to replicate up to
2421          minval-1 times, with no additions required in the copies. Then, if
2422          there is a limited maximum we have to replicate up to maxval-1 times
2423          allowing for a BRAZERO item before each optional copy and nesting
2424          brackets for all but one of the optional copies. */
2425    
2426          else
2427            {
2428            length += (minval - 1) * duplength;
2429            if (maxval > minval)   /* Need this test as maxval=-1 means no limit */
2430              length += (maxval - minval) * (duplength + 7) - 6;
2431            }
2432        }        }
2433      continue;      continue;
2434    
# Line 1833  while ((c = *(++ptr)) != 0) Line 2443  while ((c = *(++ptr)) != 0)
2443      runlength = 0;      runlength = 0;
2444      do      do
2445        {        {
2446        if ((pcre_ctypes[c] & ctype_space) != 0)        if ((options & PCRE_EXTENDED) != 0)
         {  
         if ((options & PCRE_EXTENDED) != 0) continue;  
         spaces++;  
         }  
   
       if (c == '#' && (options & PCRE_EXTENDED) != 0)  
2447          {          {
2448          while ((c = *(++ptr)) != 0 && c != '\n');          if ((compile_block.ctypes[c] & ctype_space) != 0) continue;
2449          continue;          if (c == '#')
2450              {
2451              while ((c = *(++ptr)) != 0 && c != '\n');
2452              continue;
2453              }
2454          }          }
2455    
2456        /* Backslash may introduce a data char or a metacharacter; stop the        /* Backslash may introduce a data char or a metacharacter; stop the
# Line 1851  while ((c = *(++ptr)) != 0) Line 2459  while ((c = *(++ptr)) != 0)
2459        if (c == '\\')        if (c == '\\')
2460          {          {
2461          const uschar *saveptr = ptr;          const uschar *saveptr = ptr;
2462          c = check_escape(&ptr, errorptr, bracount, options, FALSE);          c = check_escape(&ptr, errorptr, bracount, options, FALSE,
2463              &compile_block);
2464          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
2465          if (c < 0) { ptr = saveptr; break; }          if (c < 0) { ptr = saveptr; break; }
2466          }          }
# Line 1863  while ((c = *(++ptr)) != 0) Line 2472  while ((c = *(++ptr)) != 0)
2472    
2473      /* This "while" is the end of the "do" above. */      /* This "while" is the end of the "do" above. */
2474    
2475      while (runlength < 255 && (pcre_ctypes[c = *(++ptr)] & ctype_meta) == 0);      while (runlength < 255 &&
2476          (compile_block.ctypes[c = *(++ptr)] & ctype_meta) == 0);
2477    
2478      ptr--;      ptr--;
2479      length += runlength;      length += runlength;
# Line 1898  if (re == NULL) Line 2508  if (re == NULL)
2508    
2509  re->magic_number = MAGIC_NUMBER;  re->magic_number = MAGIC_NUMBER;
2510  re->options = options;  re->options = options;
2511    re->tables = tables;
2512    
2513  /* Set up a starting, non-extracting bracket, then compile the expression. On  /* Set up a starting, non-extracting bracket, then compile the expression. On
2514  error, *errorptr will be set non-NULL, so we don't need to look at the result  error, *errorptr will be set non-NULL, so we don't need to look at the result
# Line 1907  ptr = (const uschar *)pattern; Line 2518  ptr = (const uschar *)pattern;
2518  code = re->code;  code = re->code;
2519  *code = OP_BRA;  *code = OP_BRA;
2520  bracount = 0;  bracount = 0;
2521  (void)compile_regex(options, &bracount, &code, &ptr, errorptr);  (void)compile_regex(options, -1, &bracount, &code, &ptr, errorptr, FALSE, -1,
2522      &compile_block);
2523  re->top_bracket = bracount;  re->top_bracket = bracount;
2524  re->top_backref = top_backref;  re->top_backref = top_backref;
2525    
# Line 1924  if debugging, leave the test till after Line 2536  if debugging, leave the test till after
2536  if (code - re->code > length) *errorptr = ERR23;  if (code - re->code > length) *errorptr = ERR23;
2537  #endif  #endif
2538    
2539    /* Give an error if there's back reference to a non-existent capturing
2540    subpattern. */
2541    
2542    if (top_backref > re->top_bracket) *errorptr = ERR15;
2543    
2544  /* Failed to compile */  /* Failed to compile */
2545    
2546  if (*errorptr != NULL)  if (*errorptr != NULL)
# Line 1934  if (*errorptr != NULL) Line 2551  if (*errorptr != NULL)
2551    return NULL;    return NULL;
2552    }    }
2553    
2554  /* If the anchored option was not passed, set flag if we can determine that it  /* If the anchored option was not passed, set flag if we can determine that the
2555  is anchored by virtue of ^ characters or \A or anything else. Otherwise, see if  pattern is anchored by virtue of ^ characters or \A or anything else (such as
2556  we can determine what the first character has to be, because that speeds up  starting with .* when DOTALL is set).
2557  unanchored matches no end. In the case of multiline matches, an alternative is  
2558  to set the PCRE_STARTLINE flag if all branches start with ^. */  Otherwise, see if we can determine what the first character has to be, because
2559    that speeds up unanchored matches no end. If not, see if we can set the
2560    PCRE_STARTLINE flag. This is helpful for multiline matches when all branches
2561    start with ^. and also when all branches start with .* for non-DOTALL matches.
2562    */
2563    
2564  if ((options & PCRE_ANCHORED) == 0)  if ((options & PCRE_ANCHORED) == 0)
2565    {    {
2566    if (is_anchored(re->code, (options & PCRE_MULTILINE) != 0))    int temp_options = options;
2567      if (is_anchored(re->code, &temp_options))
2568      re->options |= PCRE_ANCHORED;      re->options |= PCRE_ANCHORED;
2569    else    else
2570      {      {
2571      int ch = find_firstchar(re->code);      int ch = find_firstchar(re->code, &temp_options);
2572      if (ch >= 0)      if (ch >= 0)
2573        {        {
2574        re->first_char = ch;        re->first_char = ch;
# Line 1961  if ((options & PCRE_ANCHORED) == 0) Line 2583  if ((options & PCRE_ANCHORED) == 0)
2583    
2584  #ifdef DEBUG  #ifdef DEBUG
2585    
2586  printf("Length = %d top_bracket = %d top_backref=%d\n",  printf("Length = %d top_bracket = %d top_backref = %d\n",
2587    length, re->top_bracket, re->top_backref);    length, re->top_bracket, re->top_backref);
2588    
2589  if (re->options != 0)  if (re->options != 0)
2590    {    {
2591    printf("%s%s%s%s%s%s%s\n",    printf("%s%s%s%s%s%s%s%s\n",
2592      ((re->options & PCRE_ANCHORED) != 0)? "anchored " : "",      ((re->options & PCRE_ANCHORED) != 0)? "anchored " : "",
2593      ((re->options & PCRE_CASELESS) != 0)? "caseless " : "",      ((re->options & PCRE_CASELESS) != 0)? "caseless " : "",
2594      ((re->options & PCRE_EXTENDED) != 0)? "extended " : "",      ((re->options & PCRE_EXTENDED) != 0)? "extended " : "",
2595      ((re->options & PCRE_MULTILINE) != 0)? "multiline " : "",      ((re->options & PCRE_MULTILINE) != 0)? "multiline " : "",
2596      ((re->options & PCRE_DOTALL) != 0)? "dotall " : "",      ((re->options & PCRE_DOTALL) != 0)? "dotall " : "",
2597      ((re->options & PCRE_DOLLAR_ENDONLY) != 0)? "endonly " : "",      ((re->options & PCRE_DOLLAR_ENDONLY) != 0)? "endonly " : "",
2598      ((re->options & PCRE_EXTRA) != 0)? "extra " : "");      ((re->options & PCRE_EXTRA) != 0)? "extra " : "",
2599        ((re->options & PCRE_UNGREEDY) != 0)? "ungreedy " : "");
2600    }    }
2601    
2602  if ((re->options & PCRE_FIRSTSET) != 0)  if ((re->options & PCRE_FIRSTSET) != 0)
# Line 1999  while (code < code_end) Line 2622  while (code < code_end)
2622    
2623    else switch(*code)    else switch(*code)
2624      {      {
2625        case OP_OPT:
2626        printf(" %.2x %s", code[1], OP_names[*code]);
2627        code++;
2628        break;
2629    
2630        case OP_COND:
2631        printf("%3d Cond", (code[1] << 8) + code[2]);
2632        code += 2;
2633        break;
2634    
2635        case OP_CREF:
2636        printf(" %.2d %s", code[1], OP_names[*code]);
2637        code++;
2638        break;
2639    
2640      case OP_CHARS:      case OP_CHARS:
2641      charlength = *(++code);      charlength = *(++code);
2642      printf("%3d ", charlength);      printf("%3d ", charlength);
# Line 2012  while (code < code_end) Line 2650  while (code < code_end)
2650      case OP_KET:      case OP_KET:
2651      case OP_ASSERT:      case OP_ASSERT:
2652      case OP_ASSERT_NOT:      case OP_ASSERT_NOT:
2653        case OP_ASSERTBACK:
2654        case OP_ASSERTBACK_NOT:
2655      case OP_ONCE:      case OP_ONCE:
2656      printf("%3d %s", (code[1] << 8) + code[2], OP_names[*code]);      printf("%3d %s", (code[1] << 8) + code[2], OP_names[*code]);
2657      code += 2;      code += 2;
2658      break;      break;
2659    
2660        case OP_REVERSE:
2661        printf("%3d %s", (code[1] << 8) + code[2], OP_names[*code]);
2662        code += 2;
2663        break;
2664    
2665      case OP_STAR:      case OP_STAR:
2666      case OP_MINSTAR:      case OP_MINSTAR:
2667      case OP_PLUS:      case OP_PLUS:
# Line 2092  while (code < code_end) Line 2737  while (code < code_end)
2737      case OP_CLASS:      case OP_CLASS:
2738        {        {
2739        int i, min, max;        int i, min, max;
   
2740        code++;        code++;
2741        printf("    [");        printf("    [");
2742    
# Line 2176  return (pcre *)re; Line 2820  return (pcre *)re;
2820    
2821    
2822  /*************************************************  /*************************************************
 *        Match a character type                  *  
 *************************************************/  
   
 /* Not used in all the places it might be as it's sometimes faster  
 to put the code inline.  
   
 Arguments:  
   type        the character type  
   c           the character  
   dotall      the dotall flag  
   
 Returns:      TRUE if character is of the type  
 */  
   
 static BOOL  
 match_type(int type, int c, BOOL dotall)  
 {  
   
 #ifdef DEBUG  
 if (isprint(c)) printf("matching subject %c against ", c);  
   else printf("matching subject \\x%02x against ", c);  
 printf("%s\n", OP_names[type]);  
 #endif  
   
 switch(type)  
   {  
   case OP_ANY:            return dotall || c != '\n';  
   case OP_NOT_DIGIT:      return (pcre_ctypes[c] & ctype_digit) == 0;  
   case OP_DIGIT:          return (pcre_ctypes[c] & ctype_digit) != 0;  
   case OP_NOT_WHITESPACE: return (pcre_ctypes[c] & ctype_space) == 0;  
   case OP_WHITESPACE:     return (pcre_ctypes[c] & ctype_space) != 0;  
   case OP_NOT_WORDCHAR:   return (pcre_ctypes[c] & ctype_word) == 0;  
   case OP_WORDCHAR:       return (pcre_ctypes[c] & ctype_word) != 0;  
   }  
 return FALSE;  
 }  
   
   
   
 /*************************************************  
2823  *          Match a back-reference                *  *          Match a back-reference                *
2824  *************************************************/  *************************************************/
2825    
2826  /* If a back reference hasn't been set, the match fails.  /* If a back reference hasn't been set, the length that is passed is greater
2827    than the number of characters left in the string, so the match fails.
2828    
2829  Arguments:  Arguments:
2830    number      reference number    offset      index into the offset vector
2831    eptr        points into the subject    eptr        points into the subject
2832    length      length to be matched    length      length to be matched
2833    md          points to match data block    md          points to match data block
2834      ims         the ims flags
2835    
2836  Returns:      TRUE if matched  Returns:      TRUE if matched
2837  */  */
2838    
2839  static BOOL  static BOOL
2840  match_ref(int number, register const uschar *eptr, int length, match_data *md)  match_ref(int offset, register const uschar *eptr, int length, match_data *md,
2841      int ims)
2842  {  {
2843  const uschar *p = md->start_subject + md->offset_vector[number];  const uschar *p = md->start_subject + md->offset_vector[offset];
2844    
2845  #ifdef DEBUG  #ifdef DEBUG
2846  if (eptr >= md->end_subject)  if (eptr >= md->end_subject)
# Line 2250  printf("\n"); Line 2857  printf("\n");
2857    
2858  /* Always fail if not enough characters left */  /* Always fail if not enough characters left */
2859    
2860  if (length > md->end_subject - p) return FALSE;  if (length > md->end_subject - eptr) return FALSE;
2861    
2862  /* Separate the caselesss case for speed */  /* Separate the caselesss case for speed */
2863    
2864  if (md->caseless)  if ((ims & PCRE_CASELESS) != 0)
2865    { while (length-- > 0) if (pcre_lcc[*p++] != pcre_lcc[*eptr++]) return FALSE; }    {
2866      while (length-- > 0)
2867        if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE;
2868      }
2869  else  else
2870    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
2871    
# Line 2268  return TRUE; Line 2878  return TRUE;
2878  *         Match from current position            *  *         Match from current position            *
2879  *************************************************/  *************************************************/
2880    
2881  /* On entry ecode points to the first opcode, and eptr to the first character.  /* On entry ecode points to the first opcode, and eptr to the first character
2882    in the subject string, while eptrb holds the value of eptr at the start of the
2883    last bracketed group - used for breaking infinite loops matching zero-length
2884    strings.
2885    
2886  Arguments:  Arguments:
2887     eptr        pointer in subject     eptr        pointer in subject
2888     ecode       position in code     ecode       position in code
2889     offset_top  current top pointer     offset_top  current top pointer
2890     md          pointer to "static" info for the match     md          pointer to "static" info for the match
2891       ims         current /i, /m, and /s options
2892       condassert  TRUE if called to check a condition assertion
2893       eptrb       eptr at start of last bracket
2894    
2895  Returns:       TRUE if matched  Returns:       TRUE if matched
2896  */  */
2897    
2898  static BOOL  static BOOL
2899  match(register const uschar *eptr, register const uschar *ecode, int offset_top,  match(register const uschar *eptr, register const uschar *ecode,
2900    match_data *md)    int offset_top, match_data *md, int ims, BOOL condassert, const uschar *eptrb)
2901  {  {
2902    int original_ims = ims;   /* Save for resetting on ')' */
2903    
2904  for (;;)  for (;;)
2905    {    {
2906      int op = (int)*ecode;
2907    int min, max, ctype;    int min, max, ctype;
2908    register int i;    register int i;
2909    register int c;    register int c;
2910    BOOL minimize = FALSE;    BOOL minimize = FALSE;
2911    
2912    /* Opening bracket. Check the alternative branches in turn, failing if none    /* Opening capturing bracket. If there is space in the offset vector, save
2913    match. We have to set the start offset if required and there is space    the current subject position in the working slot at the top of the vector. We
2914    in the offset vector so that it is available for subsequent back references    mustn't change the current values of the data slot, because they may be set
2915    if the bracket matches. However, if the bracket fails, we must put back the    from a previous iteration of this group, and be referred to by a reference
2916    previous value of both offsets in case they were set by a previous copy of    inside the group.
2917    the same bracket. Don't worry about setting the flag for the error case here;  
2918    that is handled in the code for KET. */    If the bracket fails to match, we need to restore this value and also the
2919      values of the final offsets, in case they were set by a previous iteration of
2920      the same bracket.
2921    
2922      If there isn't enough space in the offset vector, treat this as if it were a
2923      non-capturing bracket. Don't worry about setting the flag for the error case
2924      here; that is handled in the code for KET. */
2925    
2926    if ((int)*ecode >= OP_BRA)    if (op > OP_BRA)
2927      {      {
2928      int number = (*ecode - OP_BRA) << 1;      int number = op - OP_BRA;
2929      int save_offset1 = 0, save_offset2 = 0;      int offset = number << 1;
2930    
2931      DPRINTF(("start bracket %d\n", number/2));  #ifdef DEBUG
2932        printf("start bracket %d subject=", number);
2933        pchars(eptr, 16, TRUE, md);
2934        printf("\n");
2935    #endif
2936    
2937      if (number > 0 && number < md->offset_end)      if (offset < md->offset_max)
2938        {        {
2939        save_offset1 = md->offset_vector[number];        int save_offset1 = md->offset_vector[offset];
2940        save_offset2 = md->offset_vector[number+1];        int save_offset2 = md->offset_vector[offset+1];
2941        md->offset_vector[number] = eptr - md->start_subject;        int save_offset3 = md->offset_vector[md->offset_end - number];
2942    
2943          DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
2944          md->offset_vector[md->offset_end - number] = eptr - md->start_subject;
2945    
2946          do
2947            {
2948            if (match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr)) return TRUE;
2949            ecode += (ecode[1] << 8) + ecode[2];
2950            }
2951          while (*ecode == OP_ALT);
2952    
2953          DPRINTF(("bracket %d failed\n", number));
2954    
2955        DPRINTF(("saving %d %d\n", save_offset1, save_offset2));        md->offset_vector[offset] = save_offset1;
2956          md->offset_vector[offset+1] = save_offset2;
2957          md->offset_vector[md->offset_end - number] = save_offset3;
2958          return FALSE;
2959        }        }
2960    
2961      /* Recurse for all the alternatives. */      /* Insufficient room for saving captured contents */
2962    
2963        else op = OP_BRA;
2964        }
2965    
2966      /* Other types of node can be handled by a switch */
2967    
2968      switch(op)
2969        {
2970        case OP_BRA:     /* Non-capturing bracket: optimized */
2971        DPRINTF(("start bracket 0\n"));
2972      do      do
2973        {        {
2974        if (match(eptr, ecode+3, offset_top, md)) return TRUE;        if (match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr)) return TRUE;
2975        ecode += (ecode[1] << 8) + ecode[2];        ecode += (ecode[1] << 8) + ecode[2];
2976        }        }
2977      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
2978        DPRINTF(("bracket 0 failed\n"));
2979        return FALSE;
2980    
2981        /* Conditional group: compilation checked that there are no more than
2982        two branches. If the condition is false, skipping the first branch takes us
2983        past the end if there is only one branch, but that's OK because that is
2984        exactly what going to the ket would do. */
2985    
2986        case OP_COND:
2987        if (ecode[3] == OP_CREF)         /* Condition is extraction test */
2988          {
2989          int offset = ecode[4] << 1;    /* Doubled reference number */
2990          return match(eptr,
2991            ecode + ((offset < offset_top && md->offset_vector[offset] >= 0)?
2992              5 : 3 + (ecode[1] << 8) + ecode[2]),
2993            offset_top, md, ims, FALSE, eptr);
2994          }
2995    
2996      DPRINTF(("bracket %d failed\n", number/2));      /* The condition is an assertion. Call match() to evaluate it - setting
2997        the final argument TRUE causes it to stop at the end of an assertion. */
2998    
2999      if (number > 0 && number < md->offset_end)      else
3000        {        {
3001        md->offset_vector[number] = save_offset1;        if (match(eptr, ecode+3, offset_top, md, ims, TRUE, NULL))
3002        md->offset_vector[number+1] = save_offset2;          {
3003            ecode += 3 + (ecode[4] << 8) + ecode[5];
3004            while (*ecode == OP_ALT) ecode += (ecode[1] << 8) + ecode[2];
3005            }
3006          else ecode += (ecode[1] << 8) + ecode[2];
3007          return match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr);
3008        }        }
3009        /* Control never reaches here */
3010    
3011      return FALSE;      /* Skip over conditional reference data if encountered (should not be) */
     }  
3012    
3013    /* Other types of node can be handled by a switch */      case OP_CREF:
3014        ecode += 2;
3015        break;
3016    
3017        /* End of the pattern */
3018    
   switch(*ecode)  
     {  
3019      case OP_END:      case OP_END:
3020      md->end_match_ptr = eptr;          /* Record where we ended */      md->end_match_ptr = eptr;          /* Record where we ended */
3021      md->end_offset_top = offset_top;   /* and how many extracts were taken */      md->end_offset_top = offset_top;   /* and how many extracts were taken */
3022      return TRUE;      return TRUE;
3023    
3024      /* The equivalent of Prolog's "cut" - if the rest doesn't match, the      /* Change option settings */
     whole thing doesn't match, so we have to get out via a longjmp(). */  
3025    
3026      case OP_CUT:      case OP_OPT:
3027      if (match(eptr, ecode+1, offset_top, md)) return TRUE;      ims = ecode[1];
3028      longjmp(md->fail_env, 1);      ecode += 2;
3029        DPRINTF(("ims set to %02x\n", ims));
3030        break;
3031    
3032      /* Assertion brackets. Check the alternative branches in turn - the      /* Assertion brackets. Check the alternative branches in turn - the
3033      matching won't pass the KET for an assertion. If any one branch matches,      matching won't pass the KET for an assertion. If any one branch matches,
3034      the assertion is true. */      the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
3035        start of each branch to move the current point backwards, so the code at
3036        this level is identical to the lookahead case. */
3037    
3038      case OP_ASSERT:      case OP_ASSERT:
3039        case OP_ASSERTBACK:
3040      do      do
3041        {        {
3042        if (match(eptr, ecode+3, offset_top, md)) break;        if (match(eptr, ecode+3, offset_top, md, ims, FALSE, NULL)) break;
3043        ecode += (ecode[1] << 8) + ecode[2];        ecode += (ecode[1] << 8) + ecode[2];
3044        }        }
3045      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
3046      if (*ecode == OP_KET) return FALSE;      if (*ecode == OP_KET) return FALSE;
3047    
3048        /* If checking an assertion for a condition, return TRUE. */
3049    
3050        if (condassert) return TRUE;
3051    
3052      /* Continue from after the assertion, updating the offsets high water      /* Continue from after the assertion, updating the offsets high water
3053      mark, since extracts may have been taken during the assertion. */      mark, since extracts may have been taken during the assertion. */
3054    
# Line 2374  for (;;) Line 3060  for (;;)
3060      /* Negative assertion: all branches must fail to match */      /* Negative assertion: all branches must fail to match */
3061    
3062      case OP_ASSERT_NOT:      case OP_ASSERT_NOT:
3063        case OP_ASSERTBACK_NOT:
3064      do      do
3065        {        {
3066        if (match(eptr, ecode+3, offset_top, md)) return FALSE;        if (match(eptr, ecode+3, offset_top, md, ims, FALSE, NULL)) return FALSE;
3067        ecode += (ecode[1] << 8) + ecode[2];        ecode += (ecode[1] << 8) + ecode[2];
3068        }        }
3069      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
3070    
3071        if (condassert) return TRUE;
3072      ecode += 3;      ecode += 3;
3073      continue;      continue;
3074    
3075        /* Move the subject pointer back. This occurs only at the start of
3076        each branch of a lookbehind assertion. If we are too close to the start to
3077        move back, this match function fails. */
3078    
3079        case OP_REVERSE:
3080        eptr -= (ecode[1] << 8) + ecode[2];
3081        if (eptr < md->start_subject) return FALSE;
3082        ecode += 3;
3083        break;
3084    
3085    
3086      /* "Once" brackets are like assertion brackets except that after a match,      /* "Once" brackets are like assertion brackets except that after a match,
3087      the point in the subject string is not moved back. Thus there can never be      the point in the subject string is not moved back. Thus there can never be
3088      a move back into the brackets. Check the alternative branches in turn - the      a move back into the brackets. Check the alternative branches in turn - the
3089      matching won't pass the KET for this kind of subpattern. If any one branch      matching won't pass the KET for this kind of subpattern. If any one branch
3090      matches, we carry on, leaving the subject pointer. */      matches, we carry on as at the end of a normal bracket, leaving the subject
3091        pointer. */
3092    
3093      case OP_ONCE:      case OP_ONCE:
     do  
3094        {        {
3095        if (match(eptr, ecode+3, offset_top, md)) break;        const uschar *prev = ecode;
       ecode += (ecode[1] << 8) + ecode[2];  
       }  
     while (*ecode == OP_ALT);  
     if (*ecode == OP_KET) return FALSE;  
3096    
3097      /* Continue as from after the assertion, updating the offsets high water        do
3098      mark, since extracts may have been taken. */          {
3099            if (match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr)) break;
3100            ecode += (ecode[1] << 8) + ecode[2];
3101            }
3102          while (*ecode == OP_ALT);
3103    
3104      do ecode += (ecode[1] << 8) + ecode[2]; while (*ecode == OP_ALT);        /* If hit the end of the group (which could be repeated), fail */
3105      ecode += 3;  
3106      offset_top = md->end_offset_top;        if (*ecode != OP_ONCE && *ecode != OP_ALT) return FALSE;
3107      eptr = md->end_match_ptr;  
3108      continue;        /* Continue as from after the assertion, updating the offsets high water
3109          mark, since extracts may have been taken. */
3110    
3111          do ecode += (ecode[1] << 8) + ecode[2]; while (*ecode == OP_ALT);
3112    
3113          offset_top = md->end_offset_top;
3114          eptr = md->end_match_ptr;
3115    
3116          /* For a non-repeating ket, just continue at this level. This also
3117          happens for a repeating ket if no characters were matched in the group.
3118          This is the forcible breaking of infinite loops as implemented in Perl
3119          5.005. If there is an options reset, it will get obeyed in the normal
3120          course of events. */
3121    
3122          if (*ecode == OP_KET || eptr == eptrb)
3123            {
3124            ecode += 3;
3125            break;
3126            }
3127    
3128          /* The repeating kets try the rest of the pattern or restart from the
3129          preceding bracket, in the appropriate order. We need to reset any options
3130          that changed within the bracket before re-running it, so check the next
3131          opcode. */
3132    
3133          if (ecode[3] == OP_OPT)
3134            {
3135            ims = (ims & ~PCRE_IMS) | ecode[4];
3136            DPRINTF(("ims set to %02x at group repeat\n", ims));
3137            }
3138    
3139          if (*ecode == OP_KETRMIN)
3140            {
3141            if (match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr) ||
3142                match(eptr, prev, offset_top, md, ims, FALSE, eptr)) return TRUE;
3143            }
3144          else  /* OP_KETRMAX */
3145            {
3146            if (match(eptr, prev, offset_top, md, ims, FALSE, eptr) ||
3147                match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr)) return TRUE;
3148            }
3149          }
3150        return FALSE;
3151    
3152      /* An alternation is the end of a branch; scan along to find the end of the      /* An alternation is the end of a branch; scan along to find the end of the
3153      bracketed group and go to there. */      bracketed group and go to there. */
# Line 2423  for (;;) Line 3165  for (;;)
3165      case OP_BRAZERO:      case OP_BRAZERO:
3166        {        {
3167        const uschar *next = ecode+1;        const uschar *next = ecode+1;
3168        if (match(eptr, next, offset_top, md)) return TRUE;        if (match(eptr, next, offset_top, md, ims, FALSE, eptr)) return TRUE;
3169        do next += (next[1] << 8) + next[2]; while (*next == OP_ALT);        do next += (next[1] << 8) + next[2]; while (*next == OP_ALT);
3170        ecode = next + 3;        ecode = next + 3;
3171        }        }
# Line 2433  for (;;) Line 3175  for (;;)
3175        {        {
3176        const uschar *next = ecode+1;        const uschar *next = ecode+1;
3177        do next += (next[1] << 8) + next[2]; while (*next == OP_ALT);        do next += (next[1] << 8) + next[2]; while (*next == OP_ALT);
3178        if (match(eptr, next+3, offset_top, md)) return TRUE;        if (match(eptr, next+3, offset_top, md, ims, FALSE, eptr)) return TRUE;
3179        ecode++;        ecode++;
3180        }        }
3181      break;;      break;
3182    
3183      /* End of a group, repeated or non-repeating. If we are at the end of      /* End of a group, repeated or non-repeating. If we are at the end of
3184      an assertion "group", stop matching and return TRUE, but record the      an assertion "group", stop matching and return TRUE, but record the
3185      current high water mark for use by positive assertions. */      current high water mark for use by positive assertions. Do this also
3186        for the "once" (not-backup up) groups. */
3187    
3188      case OP_KET:      case OP_KET:
3189      case OP_KETRMIN:      case OP_KETRMIN:
3190      case OP_KETRMAX:      case OP_KETRMAX:
3191        {        {
       int number;  
3192        const uschar *prev = ecode - (ecode[1] << 8) - ecode[2];        const uschar *prev = ecode - (ecode[1] << 8) - ecode[2];
3193    
3194        if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT || *prev == OP_ONCE)        if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
3195              *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
3196              *prev == OP_ONCE)
3197          {          {
3198          md->end_match_ptr = eptr;      /* For ONCE */          md->end_match_ptr = eptr;      /* For ONCE */
3199          md->end_offset_top = offset_top;          md->end_offset_top = offset_top;
3200          return TRUE;          return TRUE;
3201          }          }
3202    
3203        /* In all other cases we have to check the group number back at the        /* In all other cases except a conditional group we have to check the
3204        start and if necessary complete handling an extraction by setting the        group number back at the start and if necessary complete handling an
3205        final offset and bumping the high water mark. */        extraction by setting the offsets and bumping the high water mark. */
3206    
3207        number = (*prev - OP_BRA) << 1;        if (*prev != OP_COND)
3208            {
3209            int number = *prev - OP_BRA;
3210            int offset = number << 1;
3211    
3212        DPRINTF(("end bracket %d\n", number/2));          DPRINTF(("end bracket %d\n", number));
3213    
3214        if (number > 0)          if (number > 0)
         {  
         if (number >= md->offset_end) md->offset_overflow = TRUE; else  
3215            {            {
3216            md->offset_vector[number+1] = eptr - md->start_subject;            if (offset >= md->offset_max) md->offset_overflow = TRUE; else
3217            if (offset_top <= number) offset_top = number + 2;              {
3218                md->offset_vector[offset] =
3219                  md->offset_vector[md->offset_end - number];
3220                md->offset_vector[offset+1] = eptr - md->start_subject;
3221                if (offset_top <= offset) offset_top = offset + 2;
3222                }
3223            }            }
3224          }          }
3225    
3226        /* For a non-repeating ket, just advance to the next node and continue at        /* Reset the value of the ims flags, in case they got changed during
3227        this level. */        the group. */
3228    
3229        if (*ecode == OP_KET)        ims = original_ims;
3230          DPRINTF(("ims reset to %02x\n", ims));
3231    
3232          /* For a non-repeating ket, just continue at this level. This also
3233          happens for a repeating ket if no characters were matched in the group.
3234          This is the forcible breaking of infinite loops as implemented in Perl
3235          5.005. If there is an options reset, it will get obeyed in the normal
3236          course of events. */
3237    
3238          if (*ecode == OP_KET || eptr == eptrb)
3239          {          {
3240          ecode += 3;          ecode += 3;
3241          break;          break;
# Line 2487  for (;;) Line 3246  for (;;)
3246    
3247        if (*ecode == OP_KETRMIN)        if (*ecode == OP_KETRMIN)
3248          {          {
3249          if (match(eptr, ecode+3, offset_top, md) ||          if (match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr) ||
3250              match(eptr, prev, offset_top, md)) return TRUE;              match(eptr, prev, offset_top, md, ims, FALSE, eptr)) return TRUE;
3251          }          }
3252        else  /* OP_KETRMAX */        else  /* OP_KETRMAX */
3253          {          {
3254          if (match(eptr, prev, offset_top, md) ||          if (match(eptr, prev, offset_top, md, ims, FALSE, eptr) ||
3255              match(eptr, ecode+3, offset_top, md)) return TRUE;              match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr)) return TRUE;
3256          }          }
3257        }        }
3258      return FALSE;      return FALSE;
# Line 2502  for (;;) Line 3261  for (;;)
3261    
3262      case OP_CIRC:      case OP_CIRC:
3263      if (md->notbol && eptr == md->start_subject) return FALSE;      if (md->notbol && eptr == md->start_subject) return FALSE;
3264      if (md->multiline)      if ((ims & PCRE_MULTILINE) != 0)
3265        {        {
3266        if (eptr != md->start_subject && eptr[-1] != '\n') return FALSE;        if (eptr != md->start_subject && eptr[-1] != '\n') return FALSE;
3267        ecode++;        ecode++;
# Line 2517  for (;;) Line 3276  for (;;)
3276      ecode++;      ecode++;
3277      break;      break;
3278    
3279      /* Assert before internal newline if multiline, or before      /* Assert before internal newline if multiline, or before a terminating
3280      a terminating newline unless endonly is set, else end of subject unless      newline unless endonly is set, else end of subject unless noteol is set. */
     noteol is set. */  
3281    
3282      case OP_DOLL:      case OP_DOLL:
3283      if (md->noteol && eptr >= md->end_subject) return FALSE;      if ((ims & PCRE_MULTILINE) != 0)
     if (md->multiline)  
3284        {        {
3285        if (eptr < md->end_subject && *eptr != '\n') return FALSE;        if (eptr < md->end_subject) { if (*eptr != '\n') return FALSE; }
3286            else { if (md->noteol) return FALSE; }
3287        ecode++;        ecode++;
3288        break;        break;
3289        }        }
3290      else if (!md->endonly)      else
3291        {        {
3292        if (eptr < md->end_subject - 1 ||        if (md->noteol) return FALSE;
3293           (eptr == md->end_subject - 1 && *eptr != '\n')) return FALSE;        if (!md->endonly)
3294        ecode++;          {
3295        break;          if (eptr < md->end_subject - 1 ||
3296               (eptr == md->end_subject - 1 && *eptr != '\n')) return FALSE;
3297    
3298            ecode++;
3299            break;
3300            }
3301        }        }
3302      /* ... else fall through */      /* ... else fall through */
3303    
3304      /* End of subject assertion */      /* End of subject assertion (\z) */
3305    
3306      case OP_EOD:      case OP_EOD:
3307      if (eptr < md->end_subject) return FALSE;      if (eptr < md->end_subject) return FALSE;
3308      ecode++;      ecode++;
3309      break;      break;
3310    
3311        /* End of subject or ending \n assertion (\Z) */
3312    
3313        case OP_EODN:
3314        if (eptr < md->end_subject - 1 ||
3315           (eptr == md->end_subject - 1 && *eptr != '\n')) return FALSE;
3316        ecode++;
3317        break;
3318    
3319      /* Word boundary assertions */      /* Word boundary assertions */
3320    
3321      case OP_NOT_WORD_BOUNDARY:      case OP_NOT_WORD_BOUNDARY:
3322      case OP_WORD_BOUNDARY:      case OP_WORD_BOUNDARY:
3323        {        {
3324        BOOL prev_is_word = (eptr != md->start_subject) &&        BOOL prev_is_word = (eptr != md->start_subject) &&
3325          ((pcre_ctypes[eptr[-1]] & ctype_word) != 0);          ((md->ctypes[eptr[-1]] & ctype_word) != 0);
3326        BOOL cur_is_word = (eptr < md->end_subject) &&        BOOL cur_is_word = (eptr < md->end_subject) &&
3327          ((pcre_ctypes[*eptr] & ctype_word) != 0);          ((md->ctypes[*eptr] & ctype_word) != 0);
3328        if ((*ecode++ == OP_WORD_BOUNDARY)?        if ((*ecode++ == OP_WORD_BOUNDARY)?
3329             cur_is_word == prev_is_word : cur_is_word != prev_is_word)             cur_is_word == prev_is_word : cur_is_word != prev_is_word)
3330          return FALSE;          return FALSE;
# Line 2563  for (;;) Line 3334  for (;;)
3334      /* Match a single character type; inline for speed */      /* Match a single character type; inline for speed */
3335    
3336      case OP_ANY:      case OP_ANY:
3337      if (!md->dotall && eptr < md->end_subject && *eptr == '\n') return FALSE;      if ((ims & PCRE_DOTALL) == 0 && eptr < md->end_subject && *eptr == '\n')
3338          return FALSE;
3339      if (eptr++ >= md->end_subject) return FALSE;      if (eptr++ >= md->end_subject) return FALSE;
3340      ecode++;      ecode++;
3341      break;      break;
3342    
3343      case OP_NOT_DIGIT:      case OP_NOT_DIGIT:
3344      if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_digit) != 0)      if (eptr >= md->end_subject ||
3345           (md->ctypes[*eptr++] & ctype_digit) != 0)
3346        return FALSE;        return FALSE;
3347      ecode++;      ecode++;
3348      break;      break;
3349    
3350      case OP_DIGIT:      case OP_DIGIT:
3351      if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_digit) == 0)      if (eptr >= md->end_subject ||
3352           (md->ctypes[*eptr++] & ctype_digit) == 0)
3353        return FALSE;        return FALSE;
3354      ecode++;      ecode++;
3355      break;      break;
3356    
3357      case OP_NOT_WHITESPACE:      case OP_NOT_WHITESPACE:
3358      if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_space) != 0)      if (eptr >= md->end_subject ||
3359           (md->ctypes[*eptr++] & ctype_space) != 0)
3360        return FALSE;        return FALSE;
3361      ecode++;      ecode++;
3362      break;      break;
3363    
3364      case OP_WHITESPACE:      case OP_WHITESPACE:
3365      if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_space) == 0)      if (eptr >= md->end_subject ||
3366           (md->ctypes[*eptr++] & ctype_space) == 0)
3367        return FALSE;        return FALSE;
3368      ecode++;      ecode++;
3369      break;      break;
3370    
3371      case OP_NOT_WORDCHAR:      case OP_NOT_WORDCHAR:
3372      if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_word) != 0)      if (eptr >= md->end_subject ||
3373           (md->ctypes[*eptr++] & ctype_word) != 0)
3374        return FALSE;        return FALSE;
3375      ecode++;      ecode++;
3376      break;      break;
3377    
3378      case OP_WORDCHAR:      case OP_WORDCHAR:
3379      if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_word) == 0)      if (eptr >= md->end_subject ||
3380           (md->ctypes[*eptr++] & ctype_word) == 0)
3381        return FALSE;        return FALSE;
3382      ecode++;      ecode++;
3383      break;      break;
# Line 2615  for (;;) Line 3393  for (;;)
3393      case OP_REF:      case OP_REF:
3394        {        {
3395        int length;        int length;
3396        int number = ecode[1] << 1;                /* Doubled reference number */        int offset = ecode[1] << 1;                /* Doubled reference number */
3397        ecode += 2;                                /* Advance past the item */        ecode += 2;                                /* Advance past the item */
3398    
3399        if (number >= offset_top || md->offset_vector[number] < 0)        /* If the reference is unset, set the length to be longer than the amount
3400          {        of subject left; this ensures that every attempt at a match fails. We
3401          md->errorcode = PCRE_ERROR_BADREF;        can't just fail here, because of the possibility of quantifiers with zero
3402          return FALSE;        minima. */
3403          }  
3404          length = (offset >= offset_top || md->offset_vector[offset] < 0)?
3405            md->end_subject - eptr + 1 :
3406            md->offset_vector[offset+1] - md->offset_vector[offset];
3407    
3408        length = md->offset_vector[number+1] - md->offset_vector[number];        /* Set up for repetition, or handle the non-repeated case */
3409    
3410        switch (*ecode)        switch (*ecode)
3411          {          {
# Line 2651  for (;;) Line 3432  for (;;)
3432          break;          break;
3433    
3434          default:               /* No repeat follows */          default:               /* No repeat follows */
3435          if (!match_ref(number, eptr, length, md)) return FALSE;          if (!match_ref(offset, eptr, length, md, ims)) return FALSE;
3436          eptr += length;          eptr += length;
3437          continue;              /* With the main loop */          continue;              /* With the main loop */
3438          }          }
# Line 2667  for (;;) Line 3448  for (;;)
3448    
3449        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
3450          {          {
3451          if (!match_ref(number, eptr, length, md)) return FALSE;          if (!match_ref(offset, eptr, length, md, ims)) return FALSE;
3452          eptr += length;          eptr += length;
3453          }          }
3454    
# Line 2682  for (;;) Line 3463  for (;;)
3463          {          {
3464          for (i = min;; i++)          for (i = min;; i++)
3465            {            {
3466            if (match(eptr, ecode, offset_top, md)) return TRUE;            if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb))
3467            if (i >= max || !match_ref(number, eptr, length, md))              return TRUE;
3468              if (i >= max || !match_ref(offset, eptr, length, md, ims))
3469              return FALSE;              return FALSE;
3470            eptr += length;            eptr += length;
3471            }            }
# Line 2697  for (;;) Line 3479  for (;;)
3479          const uschar *pp = eptr;          const uschar *pp = eptr;
3480          for (i = min; i < max; i++)          for (i = min; i < max; i++)
3481            {            {
3482            if (!match_ref(number, eptr, length, md)) break;            if (!match_ref(offset, eptr, length, md, ims)) break;
3483            eptr += length;            eptr += length;
3484            }            }
3485          while (eptr >= pp)          while (eptr >= pp)
3486            {            {
3487            if (match(eptr, ecode, offset_top, md)) return TRUE;            if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb))
3488                return TRUE;
3489            eptr -= length;            eptr -= length;
3490            }            }
3491          return FALSE;          return FALSE;
# Line 2710  for (;;) Line 3493  for (;;)
3493        }        }
3494      /* Control never gets here */      /* Control never gets here */
3495    
3496    
3497    
3498      /* Match a character class, possibly repeatedly. Look past the end of the      /* Match a character class, possibly repeatedly. Look past the end of the
3499      item to see if there is repeat information following. Then obey similar      item to see if there is repeat information following. Then obey similar
3500      code to character type repeats - written out again for speed. If caseless      code to character type repeats - written out again for speed. */
     matching was set at runtime but not at compile time, we have to check both  
     versions of a character. */  
3501    
3502      case OP_CLASS:      case OP_CLASS:
3503        {        {
# Line 2746  for (;;) Line 3529  for (;;)
3529          break;          break;
3530    
3531          default:               /* No repeat follows */          default:               /* No repeat follows */
3532          if (eptr >= md->end_subject) return FALSE;          min = max = 1;
3533          c = *eptr++;          break;
         if ((data[c/8] & (1 << (c&7))) != 0) continue;    /* With main loop */  
         if (md->runtime_caseless)  
           {  
           c = pcre_fcc[c];  
           if ((data[c/8] & (1 << (c&7))) != 0) continue;  /* With main loop */  
           }  
         return FALSE;  
3534          }          }
3535    
3536        /* First, ensure the minimum number of matches are present. */        /* First, ensure the minimum number of matches are present. */
# Line 2764  for (;;) Line 3540  for (;;)
3540          if (eptr >= md->end_subject) return FALSE;          if (eptr >= md->end_subject) return FALSE;
3541          c = *eptr++;          c = *eptr++;
3542          if ((data[c/8] & (1 << (c&7))) != 0) continue;          if ((data[c/8] & (1 << (c&7))) != 0) continue;
         if (md->runtime_caseless)  
           {  
           c = pcre_fcc[c];  
           if ((data[c/8] & (1 << (c&7))) != 0) continue;  
           }  
3543          return FALSE;          return FALSE;
3544          }          }
3545    
# Line 2784  for (;;) Line 3555  for (;;)
3555          {          {
3556          for (i = min;; i++)          for (i = min;; i++)
3557            {            {
3558            if (match(eptr, ecode, offset_top, md)) return TRUE;            if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb))
3559                return TRUE;
3560            if (i >= max || eptr >= md->end_subject) return FALSE;            if (i >= max || eptr >= md->end_subject) return FALSE;
3561            c = *eptr++;            c = *eptr++;
3562            if ((data[c/8] & (1 << (c&7))) != 0) continue;            if ((data[c/8] & (1 << (c&7))) != 0) continue;
           if (md->runtime_caseless)  
             {  
             c = pcre_fcc[c];  
             if ((data[c/8] & (1 << (c&7))) != 0) continue;  
             }  
3563            return FALSE;            return FALSE;
3564            }            }
3565          /* Control never gets here */          /* Control never gets here */
# Line 2808  for (;;) Line 3575  for (;;)
3575            if (eptr >= md->end_subject) break;            if (eptr >= md->end_subject) break;
3576            c = *eptr;            c = *eptr;
3577            if ((data[c/8] & (1 << (c&7))) != 0) continue;            if ((data[c/8] & (1 << (c&7))) != 0) continue;
           if (md->runtime_caseless)  
             {  
             c = pcre_fcc[c];  
             if ((data[c/8] & (1 << (c&7))) != 0) continue;  
             }  
3578            break;            break;
3579            }            }
3580    
3581          while (eptr >= pp)          while (eptr >= pp)
3582            if (match(eptr--, ecode, offset_top, md)) return TRUE;            if (match(eptr--, ecode, offset_top, md, ims, FALSE, eptrb))
3583                return TRUE;
3584          return FALSE;          return FALSE;
3585          }          }
3586        }        }
# Line 2844  for (;;) Line 3607  for (;;)
3607  #endif  #endif
3608    
3609        if (length > md->end_subject - eptr) return FALSE;        if (length > md->end_subject - eptr) return FALSE;
3610        if (md->caseless)        if ((ims & PCRE_CASELESS) != 0)
3611          {          {
3612          while (length-- > 0) if (pcre_lcc[*ecode++] != pcre_lcc[*eptr++]) return FALSE;          while (length-- > 0)
3613              if (md->lcc[*ecode++] != md->lcc[*eptr++])
3614                return FALSE;
3615          }          }
3616        else        else
3617          {          {
# Line 2901  for (;;) Line 3666  for (;;)
3666      DPRINTF(("matching %c{%d,%d} against subject %.*s\n", c, min, max,      DPRINTF(("matching %c{%d,%d} against subject %.*s\n", c, min, max,
3667        max, eptr));        max, eptr));
3668    
3669      if (md->caseless)      if ((ims & PCRE_CASELESS) != 0)
3670        {        {
3671        c = pcre_lcc[c];        c = md->lcc[c];
3672        for (i = 1; i <= min; i++) if (c != pcre_lcc[*eptr++]) return FALSE;        for (i = 1; i <= min; i++)
3673            if (c != md->lcc[*eptr++]) return FALSE;
3674        if (min == max) continue;        if (min == max) continue;
3675        if (minimize)        if (minimize)
3676          {          {
3677          for (i = min;; i++)          for (i = min;; i++)
3678            {            {
3679            if (match(eptr, ecode, offset_top, md)) return TRUE;            if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb))
3680            if (i >= max || eptr >= md->end_subject || c != pcre_lcc[*eptr++])              return TRUE;
3681              if (i >= max || eptr >= md->end_subject ||
3682                  c != md->lcc[*eptr++])
3683              return FALSE;              return FALSE;
3684            }            }
3685          /* Control never gets here */          /* Control never gets here */
# Line 2921  for (;;) Line 3689  for (;;)
3689          const uschar *pp = eptr;          const uschar *pp = eptr;
3690          for (i = min; i < max; i++)          for (i = min; i < max; i++)
3691            {            {
3692            if (eptr >= md->end_subject || c != pcre_lcc[*eptr]) break;            if (eptr >= md->end_subject || c != md->lcc[*eptr]) break;
3693            eptr++;            eptr++;
3694            }            }
3695          while (eptr >= pp)          while (eptr >= pp)
3696            if (match(eptr--, ecode, offset_top, md)) return TRUE;            if (match(eptr--, ecode, offset_top, md, ims, FALSE, eptrb))
3697                return TRUE;
3698          return FALSE;          return FALSE;
3699          }          }
3700        /* Control never gets here */        /* Control never gets here */
# Line 2941  for (;;) Line 3710  for (;;)
3710          {          {
3711          for (i = min;; i++)          for (i = min;; i++)
3712            {            {
3713            if (match(eptr, ecode, offset_top, md)) return TRUE;            if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb))
3714                return TRUE;
3715            if (i >= max || eptr >= md->end_subject || c != *eptr++) return FALSE;            if (i >= max || eptr >= md->end_subject || c != *eptr++) return FALSE;
3716            }            }
3717          /* Control never gets here */          /* Control never gets here */
# Line 2955  for (;;) Line 3725  for (;;)
3725            eptr++;            eptr++;
3726            }            }
3727          while (eptr >= pp)          while (eptr >= pp)
3728           if (match(eptr--, ecode, offset_top, md)) return TRUE;           if (match(eptr--, ecode, offset_top, md, ims, FALSE, eptrb))
3729               return TRUE;
3730          return FALSE;          return FALSE;
3731          }          }
3732        }        }
# Line 2966  for (;;) Line 3737  for (;;)
3737      case OP_NOT:      case OP_NOT:
3738      if (eptr >= md->end_subject) return FALSE;      if (eptr >= md->end_subject) return FALSE;
3739      ecode++;      ecode++;
3740      if (md->caseless)      if ((ims & PCRE_CASELESS) != 0)
3741        {        {
3742        if (pcre_lcc[*ecode++] == pcre_lcc[*eptr++]) return FALSE;        if (md->lcc[*ecode++] == md->lcc[*eptr++]) return FALSE;
3743        }        }
3744      else      else
3745        {        {
# Line 3026  for (;;) Line 3797  for (;;)
3797      DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", c, min, max,      DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", c, min, max,
3798        max, eptr));        max, eptr));
3799    
3800      if (md->caseless)      if ((ims & PCRE_CASELESS) != 0)
3801        {        {
3802        c = pcre_lcc[c];        c = md->lcc[c];
3803        for (i = 1; i <= min; i++) if (c == pcre_lcc[*eptr++]) return FALSE;        for (i = 1; i <= min; i++)
3804            if (c == md->lcc[*eptr++]) return FALSE;
3805        if (min == max) continue;        if (min == max) continue;
3806        if (minimize)        if (minimize)
3807          {          {
3808          for (i = min;; i++)          for (i = min;; i++)
3809            {            {
3810            if (match(eptr, ecode, offset_top, md)) return TRUE;            if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb))
3811            if (i >= max || eptr >= md->end_subject || c == pcre_lcc[*eptr++])              return TRUE;
3812              if (i >= max || eptr >= md->end_subject ||
3813                  c == md->lcc[*eptr++])
3814              return FALSE;              return FALSE;
3815            }            }
3816          /* Control never gets here */          /* Control never gets here */
# Line 3046  for (;;) Line 3820  for (;;)
3820          const uschar *pp = eptr;          const uschar *pp = eptr;
3821          for (i = min; i < max; i++)          for (i = min; i < max; i++)
3822            {            {
3823            if (eptr >= md->end_subject || c == pcre_lcc[*eptr]) break;            if (eptr >= md->end_subject || c == md->lcc[*eptr]) break;
3824            eptr++;            eptr++;
3825            }            }
3826          while (eptr >= pp)          while (eptr >= pp)
3827            if (match(eptr--, ecode, offset_top, md)) return TRUE;            if (match(eptr--, ecode, offset_top, md, ims, FALSE, eptrb))
3828                return TRUE;
3829          return FALSE;          return FALSE;
3830          }          }
3831        /* Control never gets here */        /* Control never gets here */
# Line 3066  for (;;) Line 3841  for (;;)
3841          {          {
3842          for (i = min;; i++)          for (i = min;; i++)
3843            {            {
3844            if (match(eptr, ecode, offset_top, md)) return TRUE;            if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb))
3845                return TRUE;
3846            if (i >= max || eptr >= md->end_subject || c == *eptr++) return FALSE;            if (i >= max || eptr >= md->end_subject || c == *eptr++) return FALSE;
3847            }            }
3848          /* Control never gets here */          /* Control never gets here */
# Line 3080  for (;;) Line 3856  for (;;)
3856            eptr++;            eptr++;
3857            }            }
3858          while (eptr >= pp)          while (eptr >= pp)
3859           if (match(eptr--, ecode, offset_top, md)) return TRUE;           if (match(eptr--, ecode, offset_top, md, ims, FALSE, eptrb))
3860               return TRUE;
3861          return FALSE;          return FALSE;
3862          }          }
3863        }        }
# Line 3130  for (;;) Line 3907  for (;;)
3907      if (min > 0) switch(ctype)      if (min > 0) switch(ctype)
3908        {        {
3909        case OP_ANY:        case OP_ANY:
3910        if (!md->dotall)        if ((ims & PCRE_DOTALL) == 0)
3911          { for (i = 1; i <= min; i++) if (*eptr++ == '\n') return FALSE; }          { for (i = 1; i <= min; i++) if (*eptr++ == '\n') return FALSE; }
3912        else eptr += min;        else eptr += min;
3913        break;        break;
3914    
3915        case OP_NOT_DIGIT:        case OP_NOT_DIGIT:
3916        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
3917          if ((pcre_ctypes[*eptr++] & ctype_digit) != 0) return FALSE;          if ((md->ctypes[*eptr++] & ctype_digit) != 0) return FALSE;
3918        break;        break;
3919    
3920        case OP_DIGIT:        case OP_DIGIT:
3921        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
3922          if ((pcre_ctypes[*eptr++] & ctype_digit) == 0) return FALSE;          if ((md->ctypes[*eptr++] & ctype_digit) == 0) return FALSE;
3923        break;        break;
3924    
3925        case OP_NOT_WHITESPACE:        case OP_NOT_WHITESPACE:
3926        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
3927          if ((pcre_ctypes[*eptr++] & ctype_space) != 0) return FALSE;          if ((md->ctypes[*eptr++] & ctype_space) != 0) return FALSE;
3928        break;        break;
3929    
3930        case OP_WHITESPACE:        case OP_WHITESPACE:
3931        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
3932          if ((pcre_ctypes[*eptr++] & ctype_space) == 0) return FALSE;          if ((md->ctypes[*eptr++] & ctype_space) == 0) return FALSE;
3933        break;        break;
3934    
3935        case OP_NOT_WORDCHAR:        case OP_NOT_WORDCHAR:
3936        for (i = 1; i <= min; i++) if ((pcre_ctypes[*eptr++] & ctype_word) != 0)        for (i = 1; i <= min; i++)
3937          return FALSE;          if ((md->ctypes[*eptr++] & ctype_word) != 0)
3938              return FALSE;
3939        break;        break;
3940    
3941        case OP_WORDCHAR:        case OP_WORDCHAR:
3942        for (i = 1; i <= min; i++) if ((pcre_ctypes[*eptr++] & ctype_word) == 0)        for (i = 1; i <= min; i++)
3943          return FALSE;          if ((md->ctypes[*eptr++] & ctype_word) == 0)
3944              return FALSE;
3945        break;        break;
3946        }        }
3947    
# Line 3171  for (;;) Line 3950  for (;;)
3950      if (min == max) continue;      if (min == max) continue;
3951    
3952      /* If minimizing, we have to test the rest of the pattern before each      /* If minimizing, we have to test the rest of the pattern before each
3953      subsequent match, so inlining isn't much help; just use the function. */      subsequent match. */
3954    
3955      if (minimize)      if (minimize)
3956        {        {
3957        for (i = min;; i++)        for (i = min;; i++)
3958          {          {
3959          if (match(eptr, ecode, offset_top, md)) return TRUE;          if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb)) return TRUE;
3960          if (i >= max || eptr >= md->end_subject ||          if (i >= max || eptr >= md->end_subject) return FALSE;
3961            !match_type(ctype, *eptr++, md->dotall))  
3962              return FALSE;          c = *eptr++;
3963            switch(ctype)
3964              {
3965              case OP_ANY:
3966              if ((ims & PCRE_DOTALL) == 0 && c == '\n') return FALSE;
3967              break;
3968    
3969              case OP_NOT_DIGIT:
3970              if ((md->ctypes[c] & ctype_digit) != 0) return FALSE;
3971              break;
3972    
3973              case OP_DIGIT:
3974              if ((md->ctypes[c] & ctype_digit) == 0) return FALSE;
3975              break;
3976    
3977              case OP_NOT_WHITESPACE:
3978              if ((md->ctypes[c] & ctype_space) != 0) return FALSE;
3979              break;
3980    
3981              case OP_WHITESPACE:
3982              if  ((md->ctypes[c] & ctype_space) == 0) return FALSE;
3983              break;
3984    
3985              case OP_NOT_WORDCHAR:
3986              if ((md->ctypes[c] & ctype_word) != 0) return FALSE;
3987              break;
3988    
3989              case OP_WORDCHAR:
3990              if ((md->ctypes[c] & ctype_word) == 0) return FALSE;
3991              break;
3992              }
3993          }          }
3994        /* Control never gets here */        /* Control never gets here */
3995        }        }
# Line 3194  for (;;) Line 4003  for (;;)
4003        switch(ctype)        switch(ctype)
4004          {          {
4005          case OP_ANY:          case OP_ANY:
4006          if (!md->dotall)          if ((ims & PCRE_DOTALL) == 0)
4007            {            {
4008            for (i = min; i < max; i++)            for (i = min; i < max; i++)
4009              {              {
# Line 3213  for (;;) Line 4022  for (;;)
4022          case OP_NOT_DIGIT:          case OP_NOT_DIGIT:
4023          for (i = min; i < max; i++)          for (i = min; i < max; i++)
4024            {            {
4025            if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_digit) != 0)            if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) != 0)
4026              break;              break;
4027            eptr++;            eptr++;
4028            }            }
# Line 3222  for (;;) Line 4031  for (;;)
4031          case OP_DIGIT:          case OP_DIGIT:
4032          for (i = min; i < max; i++)          for (i = min; i < max; i++)
4033            {            {
4034            if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_digit) == 0)            if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) == 0)
4035              break;              break;
4036            eptr++;            eptr++;
4037            }            }
# Line 3231  for (;;) Line 4040  for (;;)
4040          case OP_NOT_WHITESPACE:          case OP_NOT_WHITESPACE:
4041          for (i = min; i < max; i++)          for (i = min; i < max; i++)
4042            {            {
4043            if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_space) != 0)            if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) != 0)
4044              break;              break;
4045            eptr++;            eptr++;
4046            }            }
# Line 3240  for (;;) Line 4049  for (;;)
4049          case OP_WHITESPACE:          case OP_WHITESPACE:
4050          for (i = min; i < max; i++)          for (i = min; i < max; i++)
4051            {            {
4052            if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_space) == 0)            if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) == 0)
4053              break;              break;
4054            eptr++;            eptr++;
4055            }            }
# Line 3249  for (;;) Line 4058  for (;;)
4058          case OP_NOT_WORDCHAR:          case OP_NOT_WORDCHAR:
4059          for (i = min; i < max; i++)          for (i = min; i < max; i++)
4060            {            {
4061            if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_word) != 0)            if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) != 0)
4062              break;              break;
4063            eptr++;            eptr++;
4064            }            }
# Line 3258  for (;;) Line 4067  for (;;)
4067          case OP_WORDCHAR:          case OP_WORDCHAR:
4068          for (i = min; i < max; i++)          for (i = min; i < max; i++)
4069            {            {
4070            if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_word) == 0)            if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) == 0)
4071              break;              break;
4072            eptr++;            eptr++;
4073            }            }
# Line 3266  for (;;) Line 4075  for (;;)
4075          }          }
4076    
4077        while (eptr >= pp)        while (eptr >= pp)
4078          if (match(eptr--, ecode, offset_top, md)) return TRUE;          if (match(eptr--, ecode, offset_top, md, ims, FALSE, eptrb))
4079              return TRUE;
4080        return FALSE;        return FALSE;
4081        }        }
4082      /* Control never gets here */      /* Control never gets here */
# Line 3289  for (;;) Line 4099  for (;;)
4099    
4100    
4101    
 /*************************************************  
 *         Segregate setjmp()                     *  
 *************************************************/  
   
 /* The -Wall option of gcc gives warnings for all local variables when setjmp()  
 is used, even if the coding conforms to the rules of ANSI C. To avoid this, we  
 hide it in a separate function. This is called only when PCRE_EXTRA is set,  
 since it's needed only for the extension \X option, and with any luck, a good  
 compiler will spot the tail recursion and compile it efficiently.  
   
 Arguments:  
    eptr        pointer in subject  
    ecode       position in code  
    offset_top  current top pointer  
    md          pointer to "static" info for the match  
   
 Returns:       TRUE if matched  
 */  
   
 static BOOL  
 match_with_setjmp(const uschar *eptr, const uschar *ecode, int offset_top,  
   match_data *match_block)  
 {  
 return setjmp(match_block->fail_env) == 0 &&  
       match(eptr, ecode, offset_top, match_block);  
 }  
   
   
4102    
4103  /*************************************************  /*************************************************
4104  *         Execute a Regular Expression           *  *         Execute a Regular Expression           *
# Line 3347  pcre_exec(const pcre *external_re, const Line 4129  pcre_exec(const pcre *external_re, const
4129  {  {
4130  int resetcount, ocount;  int resetcount, ocount;
4131  int first_char = -1;  int first_char = -1;
4132    int ims = 0;
4133  match_data match_block;  match_data match_block;
4134  const uschar *start_bits = NULL;  const uschar *start_bits = NULL;
4135  const uschar *start_match = (const uschar *)subject;  const uschar *start_match = (const uschar *)subject;
# Line 3367  match_block.start_subject = (const uscha Line 4150  match_block.start_subject = (const uscha
4150  match_block.end_subject = match_block.start_subject + length;  match_block.end_subject = match_block.start_subject + length;
4151  end_subject = match_block.end_subject;  end_subject = match_block.end_subject;
4152    
4153  match_block.caseless  = ((re->options | options) & PCRE_CASELESS) != 0;  match_block.endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
 match_block.runtime_caseless = match_block.caseless &&  
   (re->options & PCRE_CASELESS) == 0;  
   
 match_block.multiline = ((re->options | options) & PCRE_MULTILINE) != 0;  
 match_block.dotall    = ((re->options | options) & PCRE_DOTALL) != 0;  
 match_block.endonly   = ((re->options | options) & PCRE_DOLLAR_ENDONLY) != 0;  
4154    
4155  match_block.notbol = (options & PCRE_NOTBOL) != 0;  match_block.notbol = (options & PCRE_NOTBOL) != 0;
4156  match_block.noteol = (options & PCRE_NOTEOL) != 0;  match_block.noteol = (options & PCRE_NOTEOL) != 0;
4157    
4158  match_block.errorcode = PCRE_ERROR_NOMATCH;     /* Default error */  match_block.errorcode = PCRE_ERROR_NOMATCH;     /* Default error */
4159    
4160    match_block.lcc = re->tables + lcc_offset;
4161    match_block.ctypes = re->tables + ctypes_offset;
4162    
4163    /* The ims options can vary during the matching as a result of the presence
4164    of (?ims) items in the pattern. They are kept in a local variable so that
4165    restoring at the exit of a group is easy. */
4166    
4167    ims = re->options & (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL);
4168    
4169  /* If the expression has got more back references than the offsets supplied can  /* If the expression has got more back references than the offsets supplied can
4170  hold, we get a temporary bit of working store to use during the matching.  hold, we get a temporary bit of working store to use during the matching.
4171  Otherwise, we can use the vector supplied, rounding down its size to a multiple  Otherwise, we can use the vector supplied, rounding down its size to a multiple
4172  of 2. */  of 3. */
4173    
4174  ocount = offsetcount & (-2);  ocount = offsetcount - (offsetcount % 3);
4175  if (re->top_backref > 0 && re->top_backref >= ocount/2)  
4176    if (re->top_backref > 0 && re->top_backref >= ocount/3)
4177    {    {
4178    ocount = re->top_backref * 2 + 2;    ocount = re->top_backref * 3 + 3;
4179    match_block.offset_vector = (pcre_malloc)(ocount * sizeof(int));    match_block.offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));
4180    if (match_block.offset_vector == NULL) return PCRE_ERROR_NOMEMORY;    if (match_block.offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
4181    using_temporary_offsets = TRUE;    using_temporary_offsets = TRUE;
4182    DPRINTF(("Got memory to hold back references\n"));    DPRINTF(("Got memory to hold back references\n"));
# Line 3397  if (re->top_backref > 0 && re->top_backr Line 4184  if (re->top_backref > 0 && re->top_backr
4184  else match_block.offset_vector = offsets;  else match_block.offset_vector = offsets;
4185    
4186  match_block.offset_end = ocount;  match_block.offset_end = ocount;
4187    match_block.offset_max = (2*ocount)/3;
4188  match_block.offset_overflow = FALSE;  match_block.offset_overflow = FALSE;
4189    
4190  /* Compute the minimum number of offsets that we need to reset each time. Doing  /* Compute the minimum number of offsets that we need to reset each time. Doing
# Line 3406  in the pattern. */ Line 4194  in the pattern. */
4194  resetcount = 2 + re->top_bracket * 2;  resetcount = 2 + re->top_bracket * 2;
4195  if (resetcount > offsetcount) resetcount = ocount;  if (resetcount > offsetcount) resetcount = ocount;
4196    
4197  /* If MULTILINE is set at exec time but was not set at compile time, and the  /* Reset the working variable associated with each extraction. These should
4198  anchored flag is set, we must re-check because a setting provoked by ^ in the  never be used unless previously set, but they get saved and restored, and so we
4199  pattern is not right in multi-line mode. Calling is_anchored() again here does  initialize them to avoid reading uninitialized locations. */
 the right check, because multiline is now set. If it now yields FALSE, the  
 expression must have had ^ starting some of its branches. Check to see if  
 that is true for *all* branches, and if so, set the startline flag. */  
4200    
4201  if (match_block. multiline && anchored && (re->options & PCRE_MULTILINE) == 0 &&  if (match_block.offset_vector != NULL)
     !is_anchored(re->code, match_block.multiline))  
4202    {    {
4203    anchored = FALSE;    register int *iptr = match_block.offset_vector + ocount;
4204    if (is_startline(re->code)) startline = TRUE;    register int *iend = iptr - resetcount/2 + 1;
4205      while (--iptr >= iend) *iptr = -1;
4206    }    }
4207    
4208  /* Set up the first character to match, if available. The first_char value is  /* Set up the first character to match, if available. The first_char value is
4209  never set for an anchored regular expression, but the anchoring may be forced  never set for an anchored regular expression, but the anchoring may be forced
4210  at run time, so we have to test for anchoring. The first char may be unset for  at run time, so we have to test for anchoring. The first char may be unset for
4211  an unanchored pattern, of course. If there's no first char and the pattern was  an unanchored pattern, of course. If there's no first char and the pattern was
4212  studied, the may be a bitmap of possible first characters. However, we can  studied, there may be a bitmap of possible first characters. */
 use this only if the caseless state of the studying was correct. */  
4213    
4214  if (!anchored)  if (!anchored)
4215    {    {
4216    if ((re->options & PCRE_FIRSTSET) != 0)    if ((re->options & PCRE_FIRSTSET) != 0)
4217      {      {
4218      first_char = re->first_char;      first_char = re->first_char;
4219      if (match_block.caseless) first_char = pcre_lcc[first_char];      if ((ims & PCRE_CASELESS) != 0) first_char = match_block.lcc[first_char];
4220      }      }
4221    else    else
4222      if (!startline && extra != NULL &&      if (!startline && extra != NULL &&
4223        (extra->options & PCRE_STUDY_MAPPED) != 0 &&        (extra->options & PCRE_STUDY_MAPPED) != 0)
       ((extra->options & PCRE_STUDY_CASELESS) != 0) == match_block.caseless)  
4224          start_bits = extra->start_bits;          start_bits = extra->start_bits;
4225    }    }
4226    
# Line 3457  do Line 4240  do
4240    
4241    if (first_char >= 0)    if (first_char >= 0)
4242      {      {
4243      if (match_block.caseless)      if ((ims & PCRE_CASELESS) != 0)
4244        while (start_match < end_subject && pcre_lcc[*start_match] != first_char)        while (start_match < end_subject &&
4245                 match_block.lcc[*start_match] != first_char)
4246          start_match++;          start_match++;
4247      else      else
4248        while (start_match < end_subject && *start_match != first_char)        while (start_match < end_subject && *start_match != first_char)
# Line 3498  do Line 4282  do
4282    there were too many extractions, set the return code to zero. In the case    there were too many extractions, set the return code to zero. In the case
4283    where we had to get some local store to hold offsets for backreferences, copy    where we had to get some local store to hold offsets for backreferences, copy
4284    those back references that we can. In this case there need not be overflow    those back references that we can. In this case there need not be overflow
4285    if certain parts of the pattern were not used.    if certain parts of the pattern were not used. */
   
   Before starting the match, we have to set up a longjmp() target to enable  
   the "cut" operation to fail a match completely without backtracking. This  
   is done in a separate function to avoid compiler warnings. We need not do  
   it unless PCRE_EXTRA is set, since only in that case is the "cut" operation  
   enabled. */  
4286    
4287    if ((re->options & PCRE_EXTRA) != 0)    if (!match(start_match, re->code, 2, &match_block, ims, FALSE, start_match))
4288      {      continue;
     if (!match_with_setjmp(start_match, re->code, 2, &match_block))  
       continue;  
     }  
   else if (!match(start_match, re->code, 2, &match_block)) continue;  
4289    
4290    /* Copy the offset information from temporary store if necessary */    /* Copy the offset information from temporary store if necessary */
4291    
# Line 3541  do Line 4315  do
4315    DPRINTF((">>>> returning %d\n", rc));    DPRINTF((">>>> returning %d\n", rc));
4316    return rc;    return rc;
4317    }    }
4318    
4319    /* This "while" is the end of the "do" above */
4320    
4321  while (!anchored &&  while (!anchored &&
4322         match_block.errorcode == PCRE_ERROR_NOMATCH &&         match_block.errorcode == PCRE_ERROR_NOMATCH &&
4323         start_match++ < end_subject);         start_match++ < end_subject);

Legend:
Removed from v.11  
changed lines
  Added in v.33

  ViewVC Help
Powered by ViewVC 1.1.5