/[pcre]/code/trunk/pcre.c
ViewVC logotype

Diff of /code/trunk/pcre.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 5 by nigel, Sat Feb 24 21:38:05 2007 UTC revision 29 by nigel, Sat Feb 24 21:38:53 2007 UTC
# Line 9  the file Tech.Notes for some information Line 9  the file Tech.Notes for some information
9    
10  Written by: Philip Hazel <ph10@cam.ac.uk>  Written by: Philip Hazel <ph10@cam.ac.uk>
11    
12             Copyright (c) 1997 University of Cambridge             Copyright (c) 1997-1999 University of Cambridge
13    
14  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
15  Permission is granted to anyone to use this software for any purpose on any  Permission is granted to anyone to use this software for any purpose on any
# Line 25  restrictions: Line 25  restrictions:
25    
26  3. Altered versions must be plainly marked as such, and must not be  3. Altered versions must be plainly marked as such, and must not be
27     misrepresented as being the original software.     misrepresented as being the original software.
28    
29    4. If PCRE is embedded in any software that is released under the GNU
30       General Purpose Licence (GPL), then the terms of that licence shall
31       supersede any condition above with which it is incompatible.
32  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
33  */  */
34    
# Line 33  restrictions: Line 37  restrictions:
37    
38  /* #define DEBUG */  /* #define DEBUG */
39    
40    /* Use a macro for debugging printing, 'cause that eliminates the use of #ifdef
41    inline, and there are *still* stupid compilers about that don't like indented
42    pre-processor statements. I suppose it's only been 10 years... */
43    
44    #ifdef DEBUG
45    #define DPRINTF(p) printf p
46    #else
47    #define DPRINTF(p) /*nothing*/
48    #endif
49    
50  /* Include the internals header, which itself includes Standard C headers plus  /* Include the internals header, which itself includes Standard C headers plus
51  the external pcre header. */  the external pcre header. */
# Line 40  the external pcre header. */ Line 53  the external pcre header. */
53  #include "internal.h"  #include "internal.h"
54    
55    
56    /* Allow compilation as C++ source code, should anybody want to do that. */
57    
58    #ifdef __cplusplus
59    #define class pcre_class
60    #endif
61    
62    
63    /* Number of items on the nested bracket stacks at compile time. This should
64    not be set greater than 200. */
65    
66    #define BRASTACK_SIZE 200
67    
68    
69  /* Min and max values for the common repeats; for the maxima, 0 => infinity */  /* Min and max values for the common repeats; for the maxima, 0 => infinity */
70    
71  static char rep_min[] = { 0, 0, 1, 1, 0, 0 };  static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
72  static char rep_max[] = { 0, 0, 0, 0, 1, 1 };  static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
73    
74  /* Text forms of OP_ values and things, for debugging */  /* Text forms of OP_ values and things, for debugging (not all used) */
75    
76  #ifdef DEBUG  #ifdef DEBUG
77  static char *OP_names[] = { "End", "\\A", "\\B", "\\b", "\\D", "\\d",  static const char *OP_names[] = {
78    "\\S", "\\s", "\\W", "\\w", "Cut", "\\Z", "^", "$", "Any", "chars",    "End", "\\A", "\\B", "\\b", "\\D", "\\d",
79    "not",    "\\S", "\\s", "\\W", "\\w", "\\Z", "\\z",
80      "Opt", "^", "$", "Any", "chars", "not",
81    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
82    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
83    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
84    "*", "*?", "+", "+?", "?", "??", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{",
85    "class", "Ref",    "class", "Ref",
86    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", "Once",    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",
87      "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",
88    "Brazero", "Braminzero", "Bra"    "Brazero", "Braminzero", "Bra"
89  };  };
90  #endif  #endif
# Line 66  are simple data values; negative values Line 94  are simple data values; negative values
94  on. Zero means further processing is needed (for things like \x), or the escape  on. Zero means further processing is needed (for things like \x), or the escape
95  is invalid. */  is invalid. */
96    
97  static short int escapes[] = {  static const short int escapes[] = {
98      0,      0,      0,      0,      0,      0,      0,      0,   /* 0 - 7 */      0,      0,      0,      0,      0,      0,      0,      0,   /* 0 - 7 */
99      0,      0,    ':',    ';',    '<',    '=',    '>',    '?',   /* 8 - ? */      0,      0,    ':',    ';',    '<',    '=',    '>',    '?',   /* 8 - ? */
100    '@', -ESC_A, -ESC_B,      0, -ESC_D,      0,      0,      0,   /* @ - G */    '@', -ESC_A, -ESC_B,      0, -ESC_D,      0,      0,      0,   /* @ - G */
# Line 76  static short int escapes[] = { Line 104  static short int escapes[] = {
104    '`',      7, -ESC_b,      0, -ESC_d,     27,   '\f',      0,   /* ` - g */    '`',      7, -ESC_b,      0, -ESC_d,     27,   '\f',      0,   /* ` - g */
105      0,      0,      0,      0,      0,      0,   '\n',      0,   /* h - o */      0,      0,      0,      0,      0,      0,   '\n',      0,   /* h - o */
106      0,      0,   '\r', -ESC_s,   '\t',      0,      0, -ESC_w,   /* p - w */      0,      0,   '\r', -ESC_s,   '\t',      0,      0, -ESC_w,   /* p - w */
107      0,      0,      0                                            /* x - z */      0,      0, -ESC_z                                            /* x - z */
108  };  };
109    
110  /* Definition to allow mutual recursion */  /* Definition to allow mutual recursion */
111    
112  static BOOL compile_regex(int, int *,uschar **,uschar **,char **);  static BOOL
113      compile_regex(int, int, int *, uschar **, const uschar **, const char **,
114  /* Structure for passing "static" information around between the functions      BOOL, int, compile_data *);
 doing the matching, so that they are thread-safe. */  
   
 typedef struct match_data {  
   int    errorcode;             /* As it says */  
   int   *offset_vector;         /* Offset vector */  
   int    offset_end;            /* One past the end */  
   BOOL   offset_overflow;       /* Set if too many extractions */  
   BOOL   caseless;              /* Case-independent flag */  
   BOOL   runtime_caseless;      /* Caseless forced at run time */  
   BOOL   multiline;             /* Multiline flag */  
   BOOL   notbol;                /* NOTBOL flag */  
   BOOL   noteol;                /* NOTEOL flag */  
   BOOL   dotall;                /* Dot matches any char */  
   BOOL   endonly;               /* Dollar not before final \n */  
   uschar *start_subject;        /* Start of the subject string */  
   uschar *end_subject;          /* End of the subject string */  
   jmp_buf fail_env;             /* Environment for longjump() break out */  
   uschar *end_match_ptr;        /* Subject position at end match */  
   int     end_offset_top;       /* Highwater mark at end of match */  
 } match_data;  
115    
116    
117    
# Line 123  void  (*pcre_free)(void *) = free; Line 131  void  (*pcre_free)(void *) = free;
131    
132    
133  /*************************************************  /*************************************************
134    *             Default character tables           *
135    *************************************************/
136    
137    /* A default set of character tables is included in the PCRE binary. Its source
138    is built by the maketables auxiliary program, which uses the default C ctypes
139    functions, and put in the file chartables.c. These tables are used by PCRE
140    whenever the caller of pcre_compile() does not provide an alternate set of
141    tables. */
142    
143    #include "chartables.c"
144    
145    
146    
147    /*************************************************
148  *          Return version string                 *  *          Return version string                 *
149  *************************************************/  *************************************************/
150    
151  char *  const char *
152  pcre_version(void)  pcre_version(void)
153  {  {
154  return PCRE_VERSION;  return PCRE_VERSION;
# Line 156  Returns:        number of identifying ex Line 178  Returns:        number of identifying ex
178  int  int
179  pcre_info(const pcre *external_re, int *optptr, int *first_char)  pcre_info(const pcre *external_re, int *optptr, int *first_char)
180  {  {
181  real_pcre *re = (real_pcre *)external_re;  const real_pcre *re = (const real_pcre *)external_re;
182  if (re == NULL) return PCRE_ERROR_NULL;  if (re == NULL) return PCRE_ERROR_NULL;
183  if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;  if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;
184  if (optptr != NULL) *optptr = (re->options & PUBLIC_OPTIONS);  if (optptr != NULL) *optptr = (re->options & PUBLIC_OPTIONS);
# Line 186  Arguments: Line 208  Arguments:
208  Returns:     nothing  Returns:     nothing
209  */  */
210    
211  static pchars(uschar *p, int length, BOOL is_subject, match_data *md)  static void
212    pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
213  {  {
214  int c;  int c;
215  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
# Line 199  while (length-- > 0) Line 222  while (length-- > 0)
222    
223    
224  /*************************************************  /*************************************************
 *         Check subpattern for empty operand     *  
 *************************************************/  
   
 /* This function checks a bracketed subpattern to see if any of the paths  
 through it could match an empty string. This is used to diagnose an error if  
 such a subpattern is followed by a quantifier with an unlimited upper bound.  
   
 Argument:  
   code      points to the opening bracket  
   
 Returns:    TRUE or FALSE  
 */  
   
 static BOOL  
 could_be_empty(uschar *code)  
 {  
 do {  
   uschar *cc = code + 3;  
   
   /* Scan along the opcodes for this branch; as soon as we find something  
   that matches a non-empty string, break out and advance to test the next  
   branch. If we get to the end of the branch, return TRUE for the whole  
   sub-expression. */  
   
   for (;;)  
     {  
     /* Test an embedded subpattern; if it could not be empty, break the  
     loop. Otherwise carry on in the branch. */  
   
     if ((int)(*cc) >= OP_BRA || (int)(*cc) == OP_ONCE)  
       {  
       if (!could_be_empty(cc)) break;  
       do cc += (cc[1] << 8) + cc[2]; while (*cc == OP_ALT);  
       cc += 3;  
       }  
   
     else switch (*cc)  
       {  
       /* Reached end of a branch: the subpattern may match the empty string */  
   
       case OP_ALT:  
       case OP_KET:  
       case OP_KETRMAX:  
       case OP_KETRMIN:  
       return TRUE;  
   
       /* Skip over assertive subpatterns */  
   
       case OP_ASSERT:  
       case OP_ASSERT_NOT:  
       do cc += (cc[1] << 8) + cc[2]; while (*cc == OP_ALT);  
       cc += 3;  
       break;  
   
       /* Skip over things that don't match chars */  
   
       case OP_SOD:  
       case OP_EOD:  
       case OP_CIRC:  
       case OP_DOLL:  
       case OP_BRAZERO:  
       case OP_BRAMINZERO:  
       case OP_NOT_WORD_BOUNDARY:  
       case OP_WORD_BOUNDARY:  
       cc++;  
       break;  
   
       /* Skip over simple repeats with zero lower bound */  
   
       case OP_STAR:  
       case OP_MINSTAR:  
       case OP_QUERY:  
       case OP_MINQUERY:  
       case OP_NOTSTAR:  
       case OP_NOTMINSTAR:  
       case OP_NOTQUERY:  
       case OP_NOTMINQUERY:  
       case OP_TYPESTAR:  
       case OP_TYPEMINSTAR:  
       case OP_TYPEQUERY:  
       case OP_TYPEMINQUERY:  
       cc += 2;  
       break;  
   
       /* Skip over UPTOs (lower bound is zero) */  
   
       case OP_UPTO:  
       case OP_MINUPTO:  
       case OP_TYPEUPTO:  
       case OP_TYPEMINUPTO:  
       cc += 4;  
       break;  
   
       /* Check a class or a back reference for a zero minimum */  
   
       case OP_CLASS:  
       case OP_REF:  
       cc += (*cc == OP_REF)? 2 : 33;  
   
       switch (*cc)  
         {  
         case OP_CRSTAR:  
         case OP_CRMINSTAR:  
         case OP_CRQUERY:  
         case OP_CRMINQUERY:  
         cc++;  
         break;  
   
         case OP_CRRANGE:  
         case OP_CRMINRANGE:  
         if ((cc[1] << 8) + cc[2] != 0) goto NEXT_BRANCH;  
         cc += 3;  
         break;  
   
         default:  
         goto NEXT_BRANCH;  
         }  
       break;  
   
       /* Anything else matches at least one character */  
   
       default:  
       goto NEXT_BRANCH;  
       }  
     }  
   
   NEXT_BRANCH:  
   code += (code[1] << 8) + code[2];  
   }  
 while (*code == OP_ALT);  
   
 /* No branches match the empty string */  
   
 return FALSE;  
 }  
   
   
   
 /*************************************************  
225  *            Handle escapes                      *  *            Handle escapes                      *
226  *************************************************/  *************************************************/
227    
# Line 353  Arguments: Line 237  Arguments:
237    bracount   number of previous extracting brackets    bracount   number of previous extracting brackets
238    options    the options bits    options    the options bits
239    isclass    TRUE if inside a character class    isclass    TRUE if inside a character class
240      cd         pointer to char tables block
241    
242  Returns:     zero or positive => a data character  Returns:     zero or positive => a data character
243               negative => a special escape sequence               negative => a special escape sequence
# Line 360  Returns:     zero or positive => a data Line 245  Returns:     zero or positive => a data
245  */  */
246    
247  static int  static int
248  check_escape(uschar **ptrptr, char **errorptr, int bracount, int options,  check_escape(const uschar **ptrptr, const char **errorptr, int bracount,
249    BOOL isclass)    int options, BOOL isclass, compile_data *cd)
250  {  {
251  uschar *ptr = *ptrptr;  const uschar *ptr = *ptrptr;
252  int c = *(++ptr) & 255;   /* Ensure > 0 on signed-char systems */  int c = *(++ptr) & 255;   /* Ensure > 0 on signed-char systems */
253  int i;  int i;
254    
# Line 382  else if ((i = escapes[c - '0']) != 0) c Line 267  else if ((i = escapes[c - '0']) != 0) c
267    
268  else  else
269    {    {
270    uschar *oldptr;    const uschar *oldptr;
271    switch (c)    switch (c)
272      {      {
273      /* The handling of escape sequences consisting of a string of digits      /* The handling of escape sequences consisting of a string of digits
# Line 404  else Line 289  else
289        {        {
290        oldptr = ptr;        oldptr = ptr;
291        c -= '0';        c -= '0';
292        while ((pcre_ctypes[ptr[1]] & ctype_digit) != 0)        while ((cd->ctypes[ptr[1]] & ctype_digit) != 0)
293          c = c * 10 + *(++ptr) - '0';          c = c * 10 + *(++ptr) - '0';
294        if (c < 10 || c <= bracount)        if (c < 10 || c <= bracount)
295          {          {
# Line 430  else Line 315  else
315    
316      case '0':      case '0':
317      c -= '0';      c -= '0';
318      while(i++ < 2 && (pcre_ctypes[ptr[1]] & ctype_digit) != 0 &&      while(i++ < 2 && (cd->ctypes[ptr[1]] & ctype_digit) != 0 &&
319        ptr[1] != '8' && ptr[1] != '9')        ptr[1] != '8' && ptr[1] != '9')
320          c = c * 8 + *(++ptr) - '0';          c = c * 8 + *(++ptr) - '0';
321      break;      break;
# Line 439  else Line 324  else
324    
325      case 'x':      case 'x':
326      c = 0;      c = 0;
327      while (i++ < 2 && (pcre_ctypes[ptr[1]] & ctype_xdigit) != 0)      while (i++ < 2 && (cd->ctypes[ptr[1]] & ctype_xdigit) != 0)
328        {        {
329        ptr++;        ptr++;
330        c = c * 16 + pcre_lcc[*ptr] -        c = c * 16 + cd->lcc[*ptr] -
331          (((pcre_ctypes[*ptr] & ctype_digit) != 0)? '0' : 'W');          (((cd->ctypes[*ptr] & ctype_digit) != 0)? '0' : 'W');
332        }        }
333      break;      break;
334    
# Line 457  else Line 342  else
342    
343      /* A letter is upper-cased; then the 0x40 bit is flipped */      /* A letter is upper-cased; then the 0x40 bit is flipped */
344    
345      if (c >= 'a' && c <= 'z') c = pcre_fcc[c];      if (c >= 'a' && c <= 'z') c = cd->fcc[c];
346      c ^= 0x40;      c ^= 0x40;
347      break;      break;
348    
349      /* PCRE_EXTRA enables extensions to Perl in the matter of escapes. Any      /* PCRE_EXTRA enables extensions to Perl in the matter of escapes. Any
350      other alphameric following \ is an error if PCRE_EXTRA was set; otherwise,      other alphameric following \ is an error if PCRE_EXTRA was set; otherwise,
351      for Perl compatibility, it is a literal. */      for Perl compatibility, it is a literal. This code looks a bit odd, but
352        there used to be some cases other than the default, and there may be again
353        in future, so I haven't "optimized" it. */
354    
355      default:      default:
356      if ((options & PCRE_EXTRA) != 0) switch(c)      if ((options & PCRE_EXTRA) != 0) switch(c)
357        {        {
       case 'X':  
       c = -ESC_X;      /* This could be a lookup if it ever got into Perl */  
       break;  
   
358        default:        default:
359        *errorptr = ERR3;        *errorptr = ERR3;
360        break;        break;
# Line 497  where the ddds are digits. Line 380  where the ddds are digits.
380    
381  Arguments:  Arguments:
382    p         pointer to the first char after '{'    p         pointer to the first char after '{'
383      cd        pointer to char tables block
384    
385  Returns:    TRUE or FALSE  Returns:    TRUE or FALSE
386  */  */
387    
388  static BOOL  static BOOL
389  is_counted_repeat(uschar *p)  is_counted_repeat(const uschar *p, compile_data *cd)
390  {  {
391  if ((pcre_ctypes[*p++] & ctype_digit) == 0) return FALSE;  if ((cd->ctypes[*p++] & ctype_digit) == 0) return FALSE;
392  while ((pcre_ctypes[*p] & ctype_digit) != 0) p++;  while ((cd->ctypes[*p] & ctype_digit) != 0) p++;
393  if (*p == '}') return TRUE;  if (*p == '}') return TRUE;
394    
395  if (*p++ != ',') return FALSE;  if (*p++ != ',') return FALSE;
396  if (*p == '}') return TRUE;  if (*p == '}') return TRUE;
397    
398  if ((pcre_ctypes[*p++] & ctype_digit) == 0) return FALSE;  if ((cd->ctypes[*p++] & ctype_digit) == 0) return FALSE;
399  while ((pcre_ctypes[*p] & ctype_digit) != 0) p++;  while ((cd->ctypes[*p] & ctype_digit) != 0) p++;
400  return (*p == '}');  return (*p == '}');
401  }  }
402    
# Line 532  Arguments: Line 416  Arguments:
416    maxp       pointer to int for max    maxp       pointer to int for max
417               returned as -1 if no max               returned as -1 if no max
418    errorptr   points to pointer to error message    errorptr   points to pointer to error message
419      cd         pointer to character tables clock
420    
421  Returns:     pointer to '}' on success;  Returns:     pointer to '}' on success;
422               current ptr on error, with errorptr set               current ptr on error, with errorptr set
423  */  */
424    
425  static uschar *  static const uschar *
426  read_repeat_counts(uschar *p, int *minp, int *maxp, char **errorptr)  read_repeat_counts(const uschar *p, int *minp, int *maxp,
427      const char **errorptr, compile_data *cd)
428  {  {
429  int min = 0;  int min = 0;
430  int max = -1;  int max = -1;
431    
432  while ((pcre_ctypes[*p] & ctype_digit) != 0) min = min * 10 + *p++ - '0';  while ((cd->ctypes[*p] & ctype_digit) != 0) min = min * 10 + *p++ - '0';
433    
434  if (*p == '}') max = min; else  if (*p == '}') max = min; else
435    {    {
436    if (*(++p) != '}')    if (*(++p) != '}')
437      {      {
438      max = 0;      max = 0;
439      while((pcre_ctypes[*p] & ctype_digit) != 0) max = max * 10 + *p++ - '0';      while((cd->ctypes[*p] & ctype_digit) != 0) max = max * 10 + *p++ - '0';
440      if (max < min)      if (max < min)
441        {        {
442        *errorptr = ERR4;        *errorptr = ERR4;
# Line 575  return p; Line 461  return p;
461    
462    
463  /*************************************************  /*************************************************
464    *        Find the fixed length of a pattern      *
465    *************************************************/
466    
467    /* Scan a pattern and compute the fixed length of subject that will match it,
468    if the length is fixed. This is needed for dealing with backward assertions.
469    
470    Arguments:
471      code     points to the start of the pattern (the bracket)
472    
473    Returns:   the fixed length, or -1 if there is no fixed length
474    */
475    
476    static int
477    find_fixedlength(uschar *code)
478    {
479    int length = -1;
480    
481    register int branchlength = 0;
482    register uschar *cc = code + 3;
483    
484    /* Scan along the opcodes for this branch. If we get to the end of the
485    branch, check the length against that of the other branches. */
486    
487    for (;;)
488      {
489      int d;
490      register int op = *cc;
491      if (op >= OP_BRA) op = OP_BRA;
492    
493      switch (op)
494        {
495        case OP_BRA:
496        case OP_ONCE:
497        case OP_COND:
498        d = find_fixedlength(cc);
499        if (d < 0) return -1;
500        branchlength += d;
501        do cc += (cc[1] << 8) + cc[2]; while (*cc == OP_ALT);
502        cc += 3;
503        break;
504    
505        /* Reached end of a branch; if it's a ket it is the end of a nested
506        call. If it's ALT it is an alternation in a nested call. If it is
507        END it's the end of the outer call. All can be handled by the same code. */
508    
509        case OP_ALT:
510        case OP_KET:
511        case OP_KETRMAX:
512        case OP_KETRMIN:
513        case OP_END:
514        if (length < 0) length = branchlength;
515          else if (length != branchlength) return -1;
516        if (*cc != OP_ALT) return length;
517        cc += 3;
518        branchlength = 0;
519        break;
520    
521        /* Skip over assertive subpatterns */
522    
523        case OP_ASSERT:
524        case OP_ASSERT_NOT:
525        case OP_ASSERTBACK:
526        case OP_ASSERTBACK_NOT:
527        do cc += (cc[1] << 8) + cc[2]; while (*cc == OP_ALT);
528        cc += 3;
529        break;
530    
531        /* Skip over things that don't match chars */
532    
533        case OP_REVERSE:
534        cc++;
535    
536        case OP_CREF:
537        case OP_OPT:
538        cc++;
539        /* Fall through */
540    
541        case OP_SOD:
542        case OP_EOD:
543        case OP_EODN:
544        case OP_CIRC:
545        case OP_DOLL:
546        case OP_NOT_WORD_BOUNDARY:
547        case OP_WORD_BOUNDARY:
548        cc++;
549        break;
550    
551        /* Handle char strings */
552    
553        case OP_CHARS:
554        branchlength += *(++cc);
555        cc += *cc + 1;
556        break;
557    
558        /* Handle exact repetitions */
559    
560        case OP_EXACT:
561        case OP_TYPEEXACT:
562        branchlength += (cc[1] << 8) + cc[2];
563        cc += 4;
564        break;
565    
566        /* Handle single-char matchers */
567    
568        case OP_NOT_DIGIT:
569        case OP_DIGIT:
570        case OP_NOT_WHITESPACE:
571        case OP_WHITESPACE:
572        case OP_NOT_WORDCHAR:
573        case OP_WORDCHAR:
574        case OP_ANY:
575        branchlength++;
576        cc++;
577        break;
578    
579    
580        /* Check a class for variable quantification */
581    
582        case OP_CLASS:
583        cc += (*cc == OP_REF)? 2 : 33;
584    
585        switch (*cc)
586          {
587          case OP_CRSTAR:
588          case OP_CRMINSTAR:
589          case OP_CRQUERY:
590          case OP_CRMINQUERY:
591          return -1;
592    
593          case OP_CRRANGE:
594          case OP_CRMINRANGE:
595          if ((cc[1] << 8) + cc[2] != (cc[3] << 8) + cc[4]) return -1;
596          branchlength += (cc[1] << 8) + cc[2];
597          cc += 5;
598          break;
599    
600          default:
601          branchlength++;
602          }
603        break;
604    
605        /* Anything else is variable length */
606    
607        default:
608        return -1;
609        }
610      }
611    /* Control never gets here */
612    }
613    
614    
615    
616    
617    /*************************************************
618  *           Compile one branch                   *  *           Compile one branch                   *
619  *************************************************/  *************************************************/
620    
621  /* Scan the pattern, compiling it into the code vector.  /* Scan the pattern, compiling it into the code vector.
622    
623  Arguments:  Arguments:
624    options    the option bits    options      the option bits
625    bracket    points to number of brackets used    brackets     points to number of brackets used
626    code       points to the pointer to the current code point    code         points to the pointer to the current code point
627    ptrptr     points to the current pattern pointer    ptrptr       points to the current pattern pointer
628    errorptr   points to pointer to error message    errorptr     points to pointer to error message
629      optchanged   set to the value of the last OP_OPT item compiled
630      cd           contains pointers to tables
631    
632  Returns:     TRUE on success  Returns:       TRUE on success
633               FALSE, with *errorptr set on error                 FALSE, with *errorptr set on error
634  */  */
635    
636  static BOOL  static BOOL
637  compile_branch(int options, int *brackets, uschar **codeptr, uschar **ptrptr,  compile_branch(int options, int *brackets, uschar **codeptr,
638    char **errorptr)    const uschar **ptrptr, const char **errorptr, int *optchanged,
639      compile_data *cd)
640  {  {
641  int repeat_type, op_type;  int repeat_type, op_type;
642  int repeat_min, repeat_max;  int repeat_min, repeat_max;
643  int bravalue, length;  int bravalue, length;
644    int greedy_default, greedy_non_default;
645  register int c;  register int c;
646  register uschar *code = *codeptr;  register uschar *code = *codeptr;
647  uschar *ptr = *ptrptr;  uschar *tempcode;
648    const uschar *ptr = *ptrptr;
649    const uschar *tempptr;
650  uschar *previous = NULL;  uschar *previous = NULL;
 uschar *oldptr;  
651  uschar class[32];  uschar class[32];
652    
653    /* Set up the default and non-default settings for greediness */
654    
655    greedy_default = ((options & PCRE_UNGREEDY) != 0);
656    greedy_non_default = greedy_default ^ 1;
657    
658  /* Switch on next character until the end of the branch */  /* Switch on next character until the end of the branch */
659    
660  for (;; ptr++)  for (;; ptr++)
661    {    {
662    BOOL negate_class;    BOOL negate_class;
663    int  class_charcount;    int class_charcount;
664    int  class_lastchar;    int class_lastchar;
665      int newoptions;
666      int condref;
667    
668    c = *ptr;    c = *ptr;
669    if ((options & PCRE_EXTENDED) != 0)    if ((options & PCRE_EXTENDED) != 0)
670      {      {
671      if ((pcre_ctypes[c] & ctype_space) != 0) continue;      if ((cd->ctypes[c] & ctype_space) != 0) continue;
672      if (c == '#')      if (c == '#')
673        {        {
674        while ((c = *(++ptr)) != 0 && c != '\n');        while ((c = *(++ptr)) != 0 && c != '\n');
# Line 661  for (;; ptr++) Line 713  for (;; ptr++)
713      previous = code;      previous = code;
714      *code++ = OP_CLASS;      *code++ = OP_CLASS;
715    
716      /* If the first character is '^', set the negation flag */      /* If the first character is '^', set the negation flag and skip it. */
717    
718      if ((c = *(++ptr)) == '^')      if ((c = *(++ptr)) == '^')
719        {        {
# Line 697  for (;; ptr++) Line 749  for (;; ptr++)
749        /* Backslash may introduce a single character, or it may introduce one        /* Backslash may introduce a single character, or it may introduce one
750        of the specials, which just set a flag. Escaped items are checked for        of the specials, which just set a flag. Escaped items are checked for
751        validity in the pre-compiling pass. The sequence \b is a special case.        validity in the pre-compiling pass. The sequence \b is a special case.
752        Inside a class (and only there) it is treated as backslash. Elsewhere        Inside a class (and only there) it is treated as backspace. Elsewhere
753        it marks a word boundary. Other escapes have preset maps ready to        it marks a word boundary. Other escapes have preset maps ready to
754        or into the one we are building. We assume they have more than one        or into the one we are building. We assume they have more than one
755        character in them, so set class_count bigger than one. */        character in them, so set class_count bigger than one. */
756    
757        if (c == '\\')        if (c == '\\')
758          {          {
759          c = check_escape(&ptr, errorptr, *brackets, options, TRUE);          c = check_escape(&ptr, errorptr, *brackets, options, TRUE, cd);
760          if (-c == ESC_b) c = '\b';          if (-c == ESC_b) c = '\b';
761          else if (c < 0)          else if (c < 0)
762            {            {
763              register const uschar *cbits = cd->cbits;
764            class_charcount = 10;            class_charcount = 10;
765            switch (-c)            switch (-c)
766              {              {
767              case ESC_d:              case ESC_d:
768              for (c = 0; c < 32; c++) class[c] |= pcre_cbits[c+cbit_digit];              for (c = 0; c < 32; c++) class[c] |= cbits[c+cbit_digit];
769              continue;              continue;
770    
771              case ESC_D:              case ESC_D:
772              for (c = 0; c < 32; c++) class[c] |= ~pcre_cbits[c+cbit_digit];              for (c = 0; c < 32; c++) class[c] |= ~cbits[c+cbit_digit];
773              continue;              continue;
774    
775              case ESC_w:              case ESC_w:
776              for (c = 0; c < 32; c++)              for (c = 0; c < 32; c++)
777                class[c] |= (pcre_cbits[c] | pcre_cbits[c+cbit_word]);                class[c] |= (cbits[c+cbit_digit] | cbits[c+cbit_word]);
778              continue;              continue;
779    
780              case ESC_W:              case ESC_W:
781              for (c = 0; c < 32; c++)              for (c = 0; c < 32; c++)
782                class[c] |= ~(pcre_cbits[c] | pcre_cbits[c+cbit_word]);                class[c] |= ~(cbits[c+cbit_digit] | cbits[c+cbit_word]);
783              continue;              continue;
784    
785              case ESC_s:              case ESC_s:
786              for (c = 0; c < 32; c++) class[c] |= pcre_cbits[c+cbit_space];              for (c = 0; c < 32; c++) class[c] |= cbits[c+cbit_space];
787              continue;              continue;
788    
789              case ESC_S:              case ESC_S:
790              for (c = 0; c < 32; c++) class[c] |= ~pcre_cbits[c+cbit_space];              for (c = 0; c < 32; c++) class[c] |= ~cbits[c+cbit_space];
791              continue;              continue;
792    
793              default:              default:
# Line 766  for (;; ptr++) Line 819  for (;; ptr++)
819    
820          if (d == '\\')          if (d == '\\')
821            {            {
822            d = check_escape(&ptr, errorptr, *brackets, options, TRUE);            d = check_escape(&ptr, errorptr, *brackets, options, TRUE, cd);
823            if (d < 0)            if (d < 0)
824              {              {
825              if (d == -ESC_b) d = '\b'; else              if (d == -ESC_b) d = '\b'; else
# Line 788  for (;; ptr++) Line 841  for (;; ptr++)
841            class[c/8] |= (1 << (c&7));            class[c/8] |= (1 << (c&7));
842            if ((options & PCRE_CASELESS) != 0)            if ((options & PCRE_CASELESS) != 0)
843              {              {
844              int uc = pcre_fcc[c];           /* flip case */              int uc = cd->fcc[c];           /* flip case */
845              class[uc/8] |= (1 << (uc&7));              class[uc/8] |= (1 << (uc&7));
846              }              }
847            class_charcount++;                /* in case a one-char range */            class_charcount++;                /* in case a one-char range */
# Line 803  for (;; ptr++) Line 856  for (;; ptr++)
856        class [c/8] |= (1 << (c&7));        class [c/8] |= (1 << (c&7));
857        if ((options & PCRE_CASELESS) != 0)        if ((options & PCRE_CASELESS) != 0)
858          {          {
859          c = pcre_fcc[c];   /* flip case */          c = cd->fcc[c];   /* flip case */
860          class[c/8] |= (1 << (c&7));          class[c/8] |= (1 << (c&7));
861          }          }
862        class_charcount++;        class_charcount++;
# Line 850  for (;; ptr++) Line 903  for (;; ptr++)
903      /* Various kinds of repeat */      /* Various kinds of repeat */
904    
905      case '{':      case '{':
906      if (!is_counted_repeat(ptr+1)) goto NORMAL_CHAR;      if (!is_counted_repeat(ptr+1, cd)) goto NORMAL_CHAR;
907      ptr = read_repeat_counts(ptr+1, &repeat_min, &repeat_max, errorptr);      ptr = read_repeat_counts(ptr+1, &repeat_min, &repeat_max, errorptr, cd);
908      if (*errorptr != NULL) goto FAILED;      if (*errorptr != NULL) goto FAILED;
909      goto REPEAT;      goto REPEAT;
910    
# Line 876  for (;; ptr++) Line 929  for (;; ptr++)
929        goto FAILED;        goto FAILED;
930        }        }
931    
932      /* If the next character is '?' this is a minimizing repeat. Advance to the      /* If the next character is '?' this is a minimizing repeat, by default,
933        but if PCRE_UNGREEDY is set, it works the other way round. Advance to the
934      next character. */      next character. */
935    
936      if (ptr[1] == '?') { repeat_type = 1; ptr++; } else repeat_type = 0;      if (ptr[1] == '?')
937          { repeat_type = greedy_non_default; ptr++; }
938        else repeat_type = greedy_default;
939    
940      /* If the maximum is zero then the minimum must also be zero; Perl allows      /* If the maximum is zero then the minimum must also be zero; Perl allows
941      this case, so we do too - by simply omitting the item altogether. */      this case, so we do too - by simply omitting the item altogether. */
# Line 924  for (;; ptr++) Line 980  for (;; ptr++)
980      create a suitable repeat item. The code is shared with single-character      create a suitable repeat item. The code is shared with single-character
981      repeats by adding a suitable offset into repeat_type. */      repeats by adding a suitable offset into repeat_type. */
982    
983      else if ((int)*previous < OP_EOD || *previous == OP_ANY)      else if ((int)*previous < OP_EODN || *previous == OP_ANY)
984        {        {
985        op_type = OP_TYPESTAR - OP_STAR;  /* Use type opcodes */        op_type = OP_TYPESTAR - OP_STAR;  /* Use type opcodes */
986        c = *previous;        c = *previous;
# Line 968  for (;; ptr++) Line 1024  for (;; ptr++)
1024          /* If the mininum is 1 and the previous item was a character string,          /* If the mininum is 1 and the previous item was a character string,
1025          we either have to put back the item that got cancelled if the string          we either have to put back the item that got cancelled if the string
1026          length was 1, or add the character back onto the end of a longer          length was 1, or add the character back onto the end of a longer
1027          string. For a character type nothing need be done; it will just get put          string. For a character type nothing need be done; it will just get
1028          back naturally. */          put back naturally. Note that the final character is always going to
1029            get added below. */
1030    
1031          else if (*previous == OP_CHARS)          else if (*previous == OP_CHARS)
1032            {            {
1033            if (code == previous) code += 2; else previous[1]++;            if (code == previous) code += 2; else previous[1]++;
1034            }            }
1035    
1036          /* Insert an UPTO if the max is greater than the min. */          /*  For a single negated character we also have to put back the
1037            item that got cancelled. */
1038    
1039            else if (*previous == OP_NOT) code++;
1040    
1041            /* If the maximum is unlimited, insert an OP_STAR. */
1042    
1043            if (repeat_max < 0)
1044              {
1045              *code++ = c;
1046              *code++ = OP_STAR + repeat_type;
1047              }
1048    
1049            /* Else insert an UPTO if the max is greater than the min. */
1050    
1051          if (repeat_max != repeat_min)          else if (repeat_max != repeat_min)
1052            {            {
1053            *code++ = c;            *code++ = c;
1054            repeat_max -= repeat_min;            repeat_max -= repeat_min;
# Line 1016  for (;; ptr++) Line 1086  for (;; ptr++)
1086        }        }
1087    
1088      /* If previous was a bracket group, we may have to replicate it in certain      /* If previous was a bracket group, we may have to replicate it in certain
1089      cases. If the maximum repeat count is unlimited, check that the bracket      cases. */
     group cannot match the empty string, and diagnose an error if it can. */  
1090    
1091      else if ((int)*previous >= OP_BRA)      else if ((int)*previous >= OP_BRA || (int)*previous == OP_ONCE ||
1092                 (int)*previous == OP_COND)
1093        {        {
1094        int i;        int i, ketoffset = 0;
1095        int length = code - previous;        int len = code - previous;
1096    
1097          /* If the maximum repeat count is unlimited, find the end of the bracket
1098          by scanning through from the start, and compute the offset back to it
1099          from the current code pointer. There may be an OP_OPT setting following
1100          the final KET, so we can't find the end just by going back from the code
1101          pointer. */
1102    
1103        if (repeat_max == -1 && could_be_empty(previous))        if (repeat_max == -1)
1104          {          {
1105          *errorptr = ERR10;          register uschar *ket = previous;
1106          goto FAILED;          do ket += (ket[1] << 8) + ket[2]; while (*ket != OP_KET);
1107            ketoffset = code - ket;
1108          }          }
1109    
1110        /* If the minimum is greater than zero, and the maximum is unlimited or        /* If the minimum is greater than zero, and the maximum is unlimited or
# Line 1039  for (;; ptr++) Line 1116  for (;; ptr++)
1116          {          {
1117          for (i = 1; i < repeat_min; i++)          for (i = 1; i < repeat_min; i++)
1118            {            {
1119            memcpy(code, previous, length);            memcpy(code, previous, len);
1120            code += length;            code += len;
1121            }            }
1122          }          }
1123    
# Line 1052  for (;; ptr++) Line 1129  for (;; ptr++)
1129          {          {
1130          if (repeat_min == 0)          if (repeat_min == 0)
1131            {            {
1132            memmove(previous+1, previous, length);            memmove(previous+1, previous, len);
1133            code++;            code++;
1134            *previous++ = OP_BRAZERO + repeat_type;            *previous++ = OP_BRAZERO + repeat_type;
1135            }            }
1136    
1137          for (i = 1; i < repeat_min; i++)          for (i = 1; i < repeat_min; i++)
1138            {            {
1139            memcpy(code, previous, length);            memcpy(code, previous, len);
1140            code += length;            code += len;
1141            }            }
1142    
1143          for (i = (repeat_min > 0)? repeat_min : 1; i < repeat_max; i++)          for (i = (repeat_min > 0)? repeat_min : 1; i < repeat_max; i++)
1144            {            {
1145            *code++ = OP_BRAZERO + repeat_type;            *code++ = OP_BRAZERO + repeat_type;
1146            memcpy(code, previous, length);            memcpy(code, previous, len);
1147            code += length;            code += len;
1148            }            }
1149          }          }
1150    
1151        /* If the maximum is unlimited, set a repeater in the final copy. */        /* If the maximum is unlimited, set a repeater in the final copy. We
1152          can't just offset backwards from the current code point, because we
1153          don't know if there's been an options resetting after the ket. The
1154          correct offset was computed above. */
1155    
1156        if (repeat_max == -1) code[-3] = OP_KETRMAX + repeat_type;        if (repeat_max == -1) code[-ketoffset] = OP_KETRMAX + repeat_type;
1157        }        }
1158    
1159      /* Else there's some kind of shambles */      /* Else there's some kind of shambles */
# Line 1090  for (;; ptr++) Line 1170  for (;; ptr++)
1170      break;      break;
1171    
1172    
1173      /* Start of nested bracket sub-expression, or comment or lookahead.      /* Start of nested bracket sub-expression, or comment or lookahead or
1174      First deal with special things that can come after a bracket; all are      lookbehind or option setting or condition. First deal with special things
1175      introduced by ?, and the appearance of any of them means that this is not a      that can come after a bracket; all are introduced by ?, and the appearance
1176      referencing group. They were checked for validity in the first pass over      of any of them means that this is not a referencing group. They were
1177      the string, so we don't have to check for syntax errors here.  */      checked for validity in the first pass over the string, so we don't have to
1178        check for syntax errors here.  */
1179    
1180      case '(':      case '(':
1181      previous = code;              /* Only real brackets can be repeated */      newoptions = options;
1182        condref = -1;
1183    
1184      if (*(++ptr) == '?')      if (*(++ptr) == '?')
1185        {        {
1186        bravalue = OP_BRA;        int set, unset;
1187          int *optset;
1188    
1189        switch (*(++ptr))        switch (*(++ptr))
1190          {          {
1191          case '#':          case '#':                 /* Comment; skip to ket */
         case 'i':  
         case 'm':  
         case 's':  
         case 'x':  
1192          ptr++;          ptr++;
1193          while (*ptr != ')') ptr++;          while (*ptr != ')') ptr++;
         previous = NULL;  
1194          continue;          continue;
1195    
1196          case ':':                 /* Non-extracting bracket */          case ':':                 /* Non-extracting bracket */
1197            bravalue = OP_BRA;
1198          ptr++;          ptr++;
1199          break;          break;
1200    
1201          case '=':                 /* Assertions can't be repeated */          case '(':
1202            bravalue = OP_COND;       /* Conditional group */
1203            if ((cd->ctypes[*(++ptr)] & ctype_digit) != 0)
1204              {
1205              condref = *ptr - '0';
1206              while (*(++ptr) != ')') condref = condref*10 + *ptr - '0';
1207              ptr++;
1208              }
1209            else ptr--;
1210            break;
1211    
1212            case '=':                 /* Positive lookahead */
1213          bravalue = OP_ASSERT;          bravalue = OP_ASSERT;
1214          ptr++;          ptr++;
         previous = NULL;  
1215          break;          break;
1216    
1217          case '!':          case '!':                 /* Negative lookahead */
1218          bravalue = OP_ASSERT_NOT;          bravalue = OP_ASSERT_NOT;
1219          ptr++;          ptr++;
         previous = NULL;  
1220          break;          break;
1221    
1222          case '>':                         /* "Match once" brackets */          case '<':                 /* Lookbehinds */
1223          if ((options & PCRE_EXTRA) != 0)  /* Not yet standard */          switch (*(++ptr))
1224            {            {
1225            bravalue = OP_ONCE;            case '=':               /* Positive lookbehind */
1226              bravalue = OP_ASSERTBACK;
1227            ptr++;            ptr++;
           previous = NULL;  
1228            break;            break;
1229    
1230              case '!':               /* Negative lookbehind */
1231              bravalue = OP_ASSERTBACK_NOT;
1232              ptr++;
1233              break;
1234    
1235              default:                /* Syntax error */
1236              *errorptr = ERR24;
1237              goto FAILED;
1238            }            }
1239          /* Else fall through */          break;
1240    
1241          default:          case '>':                 /* One-time brackets */
1242          *errorptr = ERR12;          bravalue = OP_ONCE;
1243          goto FAILED;          ptr++;
1244            break;
1245    
1246            default:                  /* Option setting */
1247            set = unset = 0;
1248            optset = &set;
1249    
1250            while (*ptr != ')' && *ptr != ':')
1251              {
1252              switch (*ptr++)
1253                {
1254                case '-': optset = &unset; break;
1255    
1256                case 'i': *optset |= PCRE_CASELESS; break;
1257                case 'm': *optset |= PCRE_MULTILINE; break;
1258                case 's': *optset |= PCRE_DOTALL; break;
1259                case 'x': *optset |= PCRE_EXTENDED; break;
1260                case 'U': *optset |= PCRE_UNGREEDY; break;
1261                case 'X': *optset |= PCRE_EXTRA; break;
1262    
1263                default:
1264                *errorptr = ERR12;
1265                goto FAILED;
1266                }
1267              }
1268    
1269            /* Set up the changed option bits, but don't change anything yet. */
1270    
1271            newoptions = (options | set) & (~unset);
1272    
1273            /* If the options ended with ')' this is not the start of a nested
1274            group with option changes, so the options change at this level. At top
1275            level there is nothing else to be done (the options will in fact have
1276            been set from the start of compiling as a result of the first pass) but
1277            at an inner level we must compile code to change the ims options if
1278            necessary, and pass the new setting back so that it can be put at the
1279            start of any following branches, and when this group ends, a resetting
1280            item can be compiled. */
1281    
1282            if (*ptr == ')')
1283              {
1284              if ((options & PCRE_INGROUP) != 0 &&
1285                  (options & PCRE_IMS) != (newoptions & PCRE_IMS))
1286                {
1287                *code++ = OP_OPT;
1288                *code++ = *optchanged = newoptions & PCRE_IMS;
1289                }
1290              options = newoptions;  /* Change options at this level */
1291              previous = NULL;       /* This item can't be repeated */
1292              continue;              /* It is complete */
1293              }
1294    
1295            /* If the options ended with ':' we are heading into a nested group
1296            with possible change of options. Such groups are non-capturing and are
1297            not assertions of any kind. All we need to do is skip over the ':';
1298            the newoptions value is handled below. */
1299    
1300            bravalue = OP_BRA;
1301            ptr++;
1302          }          }
1303        }        }
1304    
1305      /* Else we have a referencing group */      /* Else we have a referencing group; adjust the opcode. */
1306    
1307      else      else
1308        {        {
# Line 1158  for (;; ptr++) Line 1314  for (;; ptr++)
1314        bravalue = OP_BRA + *brackets;        bravalue = OP_BRA + *brackets;
1315        }        }
1316    
1317      /* Process nested bracketed re; at end pointer is on the bracket. We copy      /* Process nested bracketed re. Assertions may not be repeated, but other
1318      code into a non-register variable in order to be able to pass its address      kinds can be. We copy code into a non-register variable in order to be able
1319      because some compilers complain otherwise. */      to pass its address because some compilers complain otherwise. Pass in a
1320        new setting for the ims options if they have changed. */
1321    
1322        previous = (bravalue >= OP_ONCE)? code : NULL;
1323      *code = bravalue;      *code = bravalue;
1324        tempcode = code;
1325    
1326        if (!compile_regex(
1327             options | PCRE_INGROUP,       /* Set for all nested groups */
1328             ((options & PCRE_IMS) != (newoptions & PCRE_IMS))?
1329               newoptions & PCRE_IMS : -1, /* Pass ims options if changed */
1330             brackets,                     /* Bracket level */
1331             &tempcode,                    /* Where to put code (updated) */
1332             &ptr,                         /* Input pointer (updated) */
1333             errorptr,                     /* Where to put an error message */
1334             (bravalue == OP_ASSERTBACK ||
1335              bravalue == OP_ASSERTBACK_NOT), /* TRUE if back assert */
1336             condref,                      /* Condition reference number */
1337             cd))                          /* Tables block */
1338          goto FAILED;
1339    
1340        /* At the end of compiling, code is still pointing to the start of the
1341        group, while tempcode has been updated to point past the end of the group
1342        and any option resetting that may follow it. The pattern pointer (ptr)
1343        is on the bracket. */
1344    
1345        /* If this is a conditional bracket, check that there are no more than
1346        two branches in the group. */
1347    
1348        if (bravalue == OP_COND)
1349        {        {
1350        uschar *mcode = code;        int branchcount = 0;
1351        if (!compile_regex(options, brackets, &mcode, &ptr, errorptr))        uschar *tc = code;
1352    
1353          do {
1354             branchcount++;
1355             tc += (tc[1] << 8) | tc[2];
1356             }
1357          while (*tc != OP_KET);
1358    
1359          if (branchcount > 2)
1360            {
1361            *errorptr = ERR27;
1362          goto FAILED;          goto FAILED;
1363        code = mcode;          }
1364        }        }
1365    
1366        /* Now update the main code pointer to the end of the group. */
1367    
1368        code = tempcode;
1369    
1370        /* Error if hit end of pattern */
1371    
1372      if (*ptr != ')')      if (*ptr != ')')
1373        {        {
1374        *errorptr = ERR14;        *errorptr = ERR14;
# Line 1182  for (;; ptr++) Line 1381  for (;; ptr++)
1381      for validity in the pre-compiling pass. */      for validity in the pre-compiling pass. */
1382    
1383      case '\\':      case '\\':
1384      oldptr = ptr;      tempptr = ptr;
1385      c = check_escape(&ptr, errorptr, *brackets, options, FALSE);      c = check_escape(&ptr, errorptr, *brackets, options, FALSE, cd);
1386    
1387      /* Handle metacharacters introduced by \. For ones like \d, the ESC_ values      /* Handle metacharacters introduced by \. For ones like \d, the ESC_ values
1388      are arranged to be the negation of the corresponding OP_values. For the      are arranged to be the negation of the corresponding OP_values. For the
# Line 1196  for (;; ptr++) Line 1395  for (;; ptr++)
1395        {        {
1396        if (-c >= ESC_REF)        if (-c >= ESC_REF)
1397          {          {
         int refnum = -c - ESC_REF;  
         if (*brackets < refnum)  
           {  
           *errorptr = ERR15;  
           goto FAILED;  
           }  
1398          previous = code;          previous = code;
1399          *code++ = OP_REF;          *code++ = OP_REF;
1400          *code++ = refnum;          *code++ = -c - ESC_REF;
1401          }          }
1402        else        else
1403          {          {
1404          previous = (-c > ESC_b && -c < ESC_X)? code : NULL;          previous = (-c > ESC_b && -c < ESC_Z)? code : NULL;
1405          *code++ = -c;          *code++ = -c;
1406          }          }
1407        continue;        continue;
1408        }        }
1409    
1410      /* Reset and fall through */      /* Data character: reset and fall through */
1411    
1412      ptr = oldptr;      ptr = tempptr;
1413      c = '\\';      c = '\\';
1414    
1415      /* Handle a run of data characters until a metacharacter is encountered.      /* Handle a run of data characters until a metacharacter is encountered.
# Line 1234  for (;; ptr++) Line 1427  for (;; ptr++)
1427        {        {
1428        if ((options & PCRE_EXTENDED) != 0)        if ((options & PCRE_EXTENDED) != 0)
1429          {          {
1430          if ((pcre_ctypes[c] & ctype_space) != 0) continue;          if ((cd->ctypes[c] & ctype_space) != 0) continue;
1431          if (c == '#')          if (c == '#')
1432            {            {
1433            while ((c = *(++ptr)) != 0 && c != '\n');            while ((c = *(++ptr)) != 0 && c != '\n');
# Line 1249  for (;; ptr++) Line 1442  for (;; ptr++)
1442    
1443        if (c == '\\')        if (c == '\\')
1444          {          {
1445          oldptr = ptr;          tempptr = ptr;
1446          c = check_escape(&ptr, errorptr, *brackets, options, FALSE);          c = check_escape(&ptr, errorptr, *brackets, options, FALSE, cd);
1447          if (c < 0) { ptr = oldptr; break; }          if (c < 0) { ptr = tempptr; break; }
1448          }          }
1449    
1450        /* Ordinary character or single-char escape */        /* Ordinary character or single-char escape */
# Line 1262  for (;; ptr++) Line 1455  for (;; ptr++)
1455    
1456      /* This "while" is the end of the "do" above. */      /* This "while" is the end of the "do" above. */
1457    
1458      while (length < 255 && (pcre_ctypes[c = *(++ptr)] & ctype_meta) == 0);      while (length < 255 && (cd->ctypes[c = *(++ptr)] & ctype_meta) == 0);
1459    
1460      /* Compute the length and set it in the data vector, and advance to      /* Compute the length and set it in the data vector, and advance to
1461      the next state. */      the next state. */
1462    
1463      previous[1] = length;      previous[1] = length;
1464      ptr--;      if (length < 255) ptr--;
1465      break;      break;
1466      }      }
1467    }                   /* end of big loop */    }                   /* end of big loop */
# Line 1292  return FALSE; Line 1485  return FALSE;
1485  /* On entry, ptr is pointing past the bracket character, but on return  /* On entry, ptr is pointing past the bracket character, but on return
1486  it points to the closing bracket, or vertical bar, or end of string.  it points to the closing bracket, or vertical bar, or end of string.
1487  The code variable is pointing at the byte into which the BRA operator has been  The code variable is pointing at the byte into which the BRA operator has been
1488  stored.  stored. If the ims options are changed at the start (for a (?ims: group) or
1489    during any branch, we need to insert an OP_OPT item at the start of every
1490    following branch to ensure they get set correctly at run time, and also pass
1491    the new options into every subsequent branch compile.
1492    
1493  Argument:  Argument:
1494    options   the option bits    options     the option bits
1495    brackets  -> int containing the number of extracting brackets used    optchanged  new ims options to set as if (?ims) were at the start, or -1
1496    codeptr   -> the address of the current code pointer                 for no change
1497    ptrptr    -> the address of the current pattern pointer    brackets    -> int containing the number of extracting brackets used
1498    errorptr  -> pointer to error message    codeptr     -> the address of the current code pointer
1499      ptrptr      -> the address of the current pattern pointer
1500      errorptr    -> pointer to error message
1501      lookbehind  TRUE if this is a lookbehind assertion
1502      condref     > 0 for OPT_CREF setting at start of conditional group
1503      cd          points to the data block with tables pointers
1504    
1505  Returns:    TRUE on success  Returns:      TRUE on success
1506  */  */
1507    
1508  static BOOL  static BOOL
1509  compile_regex(int options, int *brackets, uschar **codeptr, uschar **ptrptr,  compile_regex(int options, int optchanged, int *brackets, uschar **codeptr,
1510    char **errorptr)    const uschar **ptrptr, const char **errorptr, BOOL lookbehind, int condref,
1511      compile_data *cd)
1512  {  {
1513  uschar *ptr = *ptrptr;  const uschar *ptr = *ptrptr;
1514  uschar *code = *codeptr;  uschar *code = *codeptr;
1515    uschar *last_branch = code;
1516  uschar *start_bracket = code;  uschar *start_bracket = code;
1517    uschar *reverse_count = NULL;
1518    int oldoptions = options & PCRE_IMS;
1519    
1520    code += 3;
1521    
1522    /* At the start of a reference-based conditional group, insert the reference
1523    number as an OP_CREF item. */
1524    
1525    if (condref > 0)
1526      {
1527      *code++ = OP_CREF;
1528      *code++ = condref;
1529      }
1530    
1531    /* Loop for each alternative branch */
1532    
1533  for (;;)  for (;;)
1534    {    {
1535    int length;    int length;
   uschar *last_branch = code;  
1536    
1537    code += 3;    /* Handle change of options */
1538    if (!compile_branch(options, brackets, &code, &ptr, errorptr))  
1539      if (optchanged >= 0)
1540        {
1541        *code++ = OP_OPT;
1542        *code++ = optchanged;
1543        options = (options & ~PCRE_IMS) | optchanged;
1544        }
1545    
1546      /* Set up dummy OP_REVERSE if lookbehind assertion */
1547    
1548      if (lookbehind)
1549        {
1550        *code++ = OP_REVERSE;
1551        reverse_count = code;
1552        *code++ = 0;
1553        *code++ = 0;
1554        }
1555    
1556      /* Now compile the branch */
1557    
1558      if (!compile_branch(options,brackets,&code,&ptr,errorptr,&optchanged,cd))
1559      {      {
1560      *ptrptr = ptr;      *ptrptr = ptr;
1561      return FALSE;      return FALSE;
# Line 1330  for (;;) Line 1567  for (;;)
1567    last_branch[1] = length >> 8;    last_branch[1] = length >> 8;
1568    last_branch[2] = length & 255;    last_branch[2] = length & 255;
1569    
1570      /* If lookbehind, check that this branch matches a fixed-length string,
1571      and put the length into the OP_REVERSE item. Temporarily mark the end of
1572      the branch with OP_END. */
1573    
1574      if (lookbehind)
1575        {
1576        *code = OP_END;
1577        length = find_fixedlength(last_branch);
1578        DPRINTF(("fixed length = %d\n", length));
1579        if (length < 0)
1580          {
1581          *errorptr = ERR25;
1582          *ptrptr = ptr;
1583          return FALSE;
1584          }
1585        reverse_count[0] = (length >> 8);
1586        reverse_count[1] = length & 255;
1587        }
1588    
1589    /* Reached end of expression, either ')' or end of pattern. Insert a    /* Reached end of expression, either ')' or end of pattern. Insert a
1590    terminating ket and the length of the whole bracketed item, and return,    terminating ket and the length of the whole bracketed item, and return,
1591    leaving the pointer at the terminating char. */    leaving the pointer at the terminating char. If any of the ims options
1592      were changed inside the group, compile a resetting op-code following. */
1593    
1594    if (*ptr != '|')    if (*ptr != '|')
1595      {      {
# Line 1340  for (;;) Line 1597  for (;;)
1597      *code++ = OP_KET;      *code++ = OP_KET;
1598      *code++ = length >> 8;      *code++ = length >> 8;
1599      *code++ = length & 255;      *code++ = length & 255;
1600        if (optchanged >= 0)
1601          {
1602          *code++ = OP_OPT;
1603          *code++ = oldoptions;
1604          }
1605      *codeptr = code;      *codeptr = code;
1606      *ptrptr = ptr;      *ptrptr = ptr;
1607      return TRUE;      return TRUE;
# Line 1348  for (;;) Line 1610  for (;;)
1610    /* Another branch follows; insert an "or" node and advance the pointer. */    /* Another branch follows; insert an "or" node and advance the pointer. */
1611    
1612    *code = OP_ALT;    *code = OP_ALT;
1613      last_branch = code;
1614      code += 3;
1615    ptr++;    ptr++;
1616    }    }
1617  /* Control never reaches here */  /* Control never reaches here */
# Line 1355  for (;;) Line 1619  for (;;)
1619    
1620    
1621    
1622    
1623    /*************************************************
1624    *      Find first significant op code            *
1625    *************************************************/
1626    
1627    /* This is called by several functions that scan a compiled expression looking
1628    for a fixed first character, or an anchoring op code etc. It skips over things
1629    that do not influence this. For one application, a change of caseless option is
1630    important.
1631    
1632    Arguments:
1633      code       pointer to the start of the group
1634      options    pointer to external options
1635      optbit     the option bit whose changing is significant, or
1636                 zero if none are
1637      optstop    TRUE to return on option change, otherwise change the options
1638                   value and continue
1639    
1640    Returns:     pointer to the first significant opcode
1641    */
1642    
1643    static const uschar*
1644    first_significant_code(const uschar *code, int *options, int optbit,
1645      BOOL optstop)
1646    {
1647    for (;;)
1648      {
1649      switch ((int)*code)
1650        {
1651        case OP_OPT:
1652        if (optbit > 0 && ((int)code[1] & optbit) != (*options & optbit))
1653          {
1654          if (optstop) return code;
1655          *options = (int)code[1];
1656          }
1657        code += 2;
1658        break;
1659    
1660        case OP_CREF:
1661        code += 2;
1662        break;
1663    
1664        case OP_ASSERT_NOT:
1665        case OP_ASSERTBACK:
1666        case OP_ASSERTBACK_NOT:
1667        do code += (code[1] << 8) + code[2]; while (*code == OP_ALT);
1668        code += 3;
1669        break;
1670    
1671        default:
1672        return code;
1673        }
1674      }
1675    /* Control never reaches here */
1676    }
1677    
1678    
1679    
1680    
1681  /*************************************************  /*************************************************
1682  *          Check for anchored expression         *  *          Check for anchored expression         *
1683  *************************************************/  *************************************************/
# Line 1369  A branch is also implicitly anchored if Line 1692  A branch is also implicitly anchored if
1692  the rest of the pattern at all possible matching points, so there is no point  the rest of the pattern at all possible matching points, so there is no point
1693  trying them again.  trying them again.
1694    
1695  Argument:  points to start of expression (the bracket)  Arguments:
1696  Returns:   TRUE or FALSE    code       points to start of expression (the bracket)
1697      options    points to the options setting
1698    
1699    Returns:     TRUE or FALSE
1700  */  */
1701    
1702  static BOOL  static BOOL
1703  is_anchored(register uschar *code, BOOL multiline)  is_anchored(register const uschar *code, int *options)
1704  {  {
1705  do {  do {
1706     int op = (int)code[3];     const uschar *scode = first_significant_code(code + 3, options,
1707     if (op >= OP_BRA || op == OP_ASSERT || op == OP_ONCE)       PCRE_MULTILINE, FALSE);
1708       { if (!is_anchored(code+3, multiline)) return FALSE; }     register int op = *scode;
1709       if (op >= OP_BRA || op == OP_ASSERT || op == OP_ONCE || op == OP_COND)
1710         { if (!is_anchored(scode, options)) return FALSE; }
1711     else if (op == OP_TYPESTAR || op == OP_TYPEMINSTAR)     else if (op == OP_TYPESTAR || op == OP_TYPEMINSTAR)
1712       { if (code[4] != OP_ANY) return FALSE; }       { if (scode[1] != OP_ANY) return FALSE; }
1713     else if (op != OP_SOD && (multiline || op != OP_CIRC)) return FALSE;     else if (op != OP_SOD &&
1714               ((*options & PCRE_MULTILINE) != 0 || op != OP_CIRC))
1715         return FALSE;
1716     code += (code[1] << 8) + code[2];     code += (code[1] << 8) + code[2];
1717     }     }
1718  while (*code == OP_ALT);  while (*code == OP_ALT);
# Line 1403  Returns:   TRUE or FALSE Line 1733  Returns:   TRUE or FALSE
1733  */  */
1734    
1735  static BOOL  static BOOL
1736  is_startline(uschar *code)  is_startline(const uschar *code)
1737  {  {
1738  do {  do {
1739     if ((int)code[3] >= OP_BRA || code[3] == OP_ASSERT)     const uschar *scode = first_significant_code(code + 3, NULL, 0, FALSE);
1740       { if (!is_startline(code+3)) return FALSE; }     register int op = *scode;
1741     else if (code[3] != OP_CIRC) return FALSE;     if (op >= OP_BRA || op == OP_ASSERT || op == OP_ONCE || op == OP_COND)
1742         { if (!is_startline(scode)) return FALSE; }
1743       else if (op != OP_CIRC) return FALSE;
1744     code += (code[1] << 8) + code[2];     code += (code[1] << 8) + code[2];
1745     }     }
1746  while (*code == OP_ALT);  while (*code == OP_ALT);
# Line 1427  Consider each alternative branch. If the Line 1759  Consider each alternative branch. If the
1759  a bracket all of whose alternatives start with the same char (recurse ad lib),  a bracket all of whose alternatives start with the same char (recurse ad lib),
1760  then we return that char, otherwise -1.  then we return that char, otherwise -1.
1761    
1762  Argument:  points to start of expression (the bracket)  Arguments:
1763  Returns:   -1 or the fixed first char    code       points to start of expression (the bracket)
1764      options    pointer to the options (used to check casing changes)
1765    
1766    Returns:     -1 or the fixed first char
1767  */  */
1768    
1769  static int  static int
1770  find_firstchar(uschar *code)  find_firstchar(const uschar *code, int *options)
1771  {  {
1772  register int c = -1;  register int c = -1;
1773  do  do {
1774    {     int d;
1775    register int charoffset = 4;     const uschar *scode = first_significant_code(code + 3, options,
1776         PCRE_CASELESS, TRUE);
1777    if ((int)code[3] >= OP_BRA || code[3] == OP_ASSERT)     register int op = *scode;
1778      {  
1779      register int d;     if (op >= OP_BRA) op = OP_BRA;
1780      if ((d = find_firstchar(code+3)) < 0) return -1;  
1781      if (c < 0) c = d; else if (c != d) return -1;     switch(op)
1782      }       {
1783         default:
1784    else switch(code[3])       return -1;
1785      {  
1786      default:       case OP_BRA:
1787      return -1;       case OP_ASSERT:
1788         case OP_ONCE:
1789      case OP_EXACT:       /* Fall through */       case OP_COND:
1790      charoffset++;       if ((d = find_firstchar(scode, options)) < 0) return -1;
1791         if (c < 0) c = d; else if (c != d) return -1;
1792      case OP_CHARS:       /* Fall through */       break;
1793      charoffset++;  
1794         case OP_EXACT:       /* Fall through */
1795         scode++;
1796    
1797         case OP_CHARS:       /* Fall through */
1798         scode++;
1799    
1800         case OP_PLUS:
1801         case OP_MINPLUS:
1802         if (c < 0) c = scode[1]; else if (c != scode[1]) return -1;
1803         break;
1804         }
1805    
1806      case OP_PLUS:     code += (code[1] << 8) + code[2];
1807      case OP_MINPLUS:     }
     if (c < 0) c = code[charoffset]; else if (c != code[charoffset]) return -1;  
     break;  
     }  
   code += (code[1] << 8) + code[2];  
   }  
1808  while (*code == OP_ALT);  while (*code == OP_ALT);
1809  return c;  return c;
1810  }  }
1811    
1812    
1813    
1814    
1815    
1816  /*************************************************  /*************************************************
1817  *        Compile a Regular Expression            *  *        Compile a Regular Expression            *
1818  *************************************************/  *************************************************/
# Line 1482  Arguments: Line 1825  Arguments:
1825    options      various option bits    options      various option bits
1826    errorptr     pointer to pointer to error text    errorptr     pointer to pointer to error text
1827    erroroffset  ptr offset in pattern where error was detected    erroroffset  ptr offset in pattern where error was detected
1828      tables       pointer to character tables or NULL
1829    
1830  Returns:       pointer to compiled data block, or NULL on error,  Returns:       pointer to compiled data block, or NULL on error,
1831                 with errorptr and erroroffset set                 with errorptr and erroroffset set
1832  */  */
1833    
1834  pcre *  pcre *
1835  pcre_compile(const char *pattern, int options, char **errorptr,  pcre_compile(const char *pattern, int options, const char **errorptr,
1836    int *erroroffset)    int *erroroffset, const unsigned char *tables)
1837  {  {
1838  real_pcre *re;  real_pcre *re;
 int spaces = 0;  
1839  int length = 3;      /* For initial BRA plus length */  int length = 3;      /* For initial BRA plus length */
1840  int runlength;  int runlength;
1841  int c, size;  int c, size;
1842  int bracount = 0;  int bracount = 0;
 int brastack[200];  
 int brastackptr = 0;  
1843  int top_backref = 0;  int top_backref = 0;
1844  uschar *code, *ptr;  int branch_extra = 0;
1845    int branch_newextra;
1846    unsigned int brastackptr = 0;
1847    uschar *code;
1848    const uschar *ptr;
1849    compile_data compile_block;
1850    int brastack[BRASTACK_SIZE];
1851    uschar bralenstack[BRASTACK_SIZE];
1852    
1853  #ifdef DEBUG  #ifdef DEBUG
1854  uschar *code_base, *code_end;  uschar *code_base, *code_end;
# Line 1527  if ((options & ~PUBLIC_OPTIONS) != 0) Line 1875  if ((options & ~PUBLIC_OPTIONS) != 0)
1875    return NULL;    return NULL;
1876    }    }
1877    
1878  #ifdef DEBUG  /* Set up pointers to the individual character tables */
1879  printf("------------------------------------------------------------------\n");  
1880  printf("%s\n", pattern);  if (tables == NULL) tables = pcre_default_tables;
1881  #endif  compile_block.lcc = tables + lcc_offset;
1882    compile_block.fcc = tables + fcc_offset;
1883    compile_block.cbits = tables + cbits_offset;
1884    compile_block.ctypes = tables + ctypes_offset;
1885    
1886    /* Reflect pattern for debugging output */
1887    
1888    DPRINTF(("------------------------------------------------------------------\n"));
1889    DPRINTF(("%s\n", pattern));
1890    
1891  /* The first thing to do is to make a pass over the pattern to compute the  /* The first thing to do is to make a pass over the pattern to compute the
1892  amount of store required to hold the compiled code. This does not have to be  amount of store required to hold the compiled code. This does not have to be
# Line 1539  internal flag settings. Make an attempt Line 1895  internal flag settings. Make an attempt
1895  if an "extended" flag setting appears late in the pattern. We can't be so  if an "extended" flag setting appears late in the pattern. We can't be so
1896  clever for #-comments. */  clever for #-comments. */
1897    
1898  ptr = (uschar *)(pattern - 1);  ptr = (const uschar *)(pattern - 1);
1899  while ((c = *(++ptr)) != 0)  while ((c = *(++ptr)) != 0)
1900    {    {
1901    int min, max;    int min, max;
1902    int class_charcount;    int class_charcount;
1903    
1904    if ((pcre_ctypes[c] & ctype_space) != 0)    if ((options & PCRE_EXTENDED) != 0)
     {  
     if ((options & PCRE_EXTENDED) != 0) continue;  
     spaces++;  
     }  
   
   if (c == '#' && (options & PCRE_EXTENDED) != 0)  
1905      {      {
1906      while ((c = *(++ptr)) != 0 && c != '\n');      if ((compile_block.ctypes[c] & ctype_space) != 0) continue;
1907      continue;      if (c == '#')
1908          {
1909          while ((c = *(++ptr)) != 0 && c != '\n');
1910          continue;
1911          }
1912      }      }
1913    
1914    switch(c)    switch(c)
# Line 1566  while ((c = *(++ptr)) != 0) Line 1920  while ((c = *(++ptr)) != 0)
1920    
1921      case '\\':      case '\\':
1922        {        {
1923        uschar *save_ptr = ptr;        const uschar *save_ptr = ptr;
1924        c = check_escape(&ptr, errorptr, bracount, options, FALSE);        c = check_escape(&ptr, errorptr, bracount, options, FALSE, &compile_block);
1925        if (*errorptr != NULL) goto PCRE_ERROR_RETURN;        if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
1926        if (c >= 0)        if (c >= 0)
1927          {          {
# Line 1587  while ((c = *(++ptr)) != 0) Line 1941  while ((c = *(++ptr)) != 0)
1941        int refnum = -c - ESC_REF;        int refnum = -c - ESC_REF;
1942        if (refnum > top_backref) top_backref = refnum;        if (refnum > top_backref) top_backref = refnum;
1943        length++;   /* For single back reference */        length++;   /* For single back reference */
1944        if (ptr[1] == '{' && is_counted_repeat(ptr+2))        if (ptr[1] == '{' && is_counted_repeat(ptr+2, &compile_block))
1945          {          {
1946          ptr = read_repeat_counts(ptr+2, &min, &max, errorptr);          ptr = read_repeat_counts(ptr+2, &min, &max, errorptr, &compile_block);
1947          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
1948          if ((min == 0 && (max == 1 || max == -1)) ||          if ((min == 0 && (max == 1 || max == -1)) ||
1949            (min == 1 && max == -1))            (min == 1 && max == -1))
# Line 1613  while ((c = *(++ptr)) != 0) Line 1967  while ((c = *(++ptr)) != 0)
1967      or back reference. */      or back reference. */
1968    
1969      case '{':      case '{':
1970      if (!is_counted_repeat(ptr+1)) goto NORMAL_CHAR;      if (!is_counted_repeat(ptr+1, &compile_block)) goto NORMAL_CHAR;
1971      ptr = read_repeat_counts(ptr+1, &min, &max, errorptr);      ptr = read_repeat_counts(ptr+1, &min, &max, errorptr, &compile_block);
1972      if (*errorptr != NULL) goto PCRE_ERROR_RETURN;      if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
1973      if ((min == 0 && (max == 1 || max == -1)) ||      if ((min == 0 && (max == 1 || max == -1)) ||
1974        (min == 1 && max == -1))        (min == 1 && max == -1))
# Line 1628  while ((c = *(++ptr)) != 0) Line 1982  while ((c = *(++ptr)) != 0)
1982      if (ptr[1] == '?') ptr++;      if (ptr[1] == '?') ptr++;
1983      continue;      continue;
1984    
1985      /* An alternation contains an offset to the next branch or ket. */      /* An alternation contains an offset to the next branch or ket. If any ims
1986        options changed in the previous branch(es), and/or if we are in a
1987        lookbehind assertion, extra space will be needed at the start of the
1988        branch. This is handled by branch_extra. */
1989    
1990      case '|':      case '|':
1991      length += 3;      length += 3 + branch_extra;
1992      continue;      continue;
1993    
1994      /* A character class uses 33 characters. Don't worry about character types      /* A character class uses 33 characters. Don't worry about character types
# Line 1645  while ((c = *(++ptr)) != 0) Line 2003  while ((c = *(++ptr)) != 0)
2003        {        {
2004        if (*ptr == '\\')        if (*ptr == '\\')
2005          {          {
2006          int c = check_escape(&ptr, errorptr, bracount, options, TRUE);          int ch = check_escape(&ptr, errorptr, bracount, options, TRUE,
2007              &compile_block);
2008          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
2009          if (-c == ESC_b) class_charcount++; else class_charcount = 10;          if (-ch == ESC_b) class_charcount++; else class_charcount = 10;
2010          }          }
2011        else class_charcount++;        else class_charcount++;
2012        ptr++;        ptr++;
# Line 1662  while ((c = *(++ptr)) != 0) Line 2021  while ((c = *(++ptr)) != 0)
2021    
2022        /* A repeat needs either 1 or 5 bytes. */        /* A repeat needs either 1 or 5 bytes. */
2023    
2024        if (ptr[1] == '{' && is_counted_repeat(ptr+2))        if (*ptr != 0 && ptr[1] == '{' && is_counted_repeat(ptr+2, &compile_block))
2025          {          {
2026          ptr = read_repeat_counts(ptr+2, &min, &max, errorptr);          ptr = read_repeat_counts(ptr+2, &min, &max, errorptr, &compile_block);
2027          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
2028          if ((min == 0 && (max == 1 || max == -1)) ||          if ((min == 0 && (max == 1 || max == -1)) ||
2029            (min == 1 && max == -1))            (min == 1 && max == -1))
# Line 1675  while ((c = *(++ptr)) != 0) Line 2034  while ((c = *(++ptr)) != 0)
2034        }        }
2035      continue;      continue;
2036    
2037      /* Brackets may be genuine groups or special things */      /* Brackets may be genuine groups or special things */
2038    
2039        case '(':
2040        branch_newextra = 0;
2041    
2042        /* Handle special forms of bracket, which all start (? */
2043    
2044        if (ptr[1] == '?')
2045          {
2046          int set, unset;
2047          int *optset;
2048    
2049          switch (c = ptr[2])
2050            {
2051            /* Skip over comments entirely */
2052            case '#':
2053            ptr += 3;
2054            while (*ptr != 0 && *ptr != ')') ptr++;
2055            if (*ptr == 0)
2056              {
2057              *errorptr = ERR18;
2058              goto PCRE_ERROR_RETURN;
2059              }
2060            continue;
2061    
2062            /* Non-referencing groups and lookaheads just move the pointer on, and
2063            then behave like a non-special bracket, except that they don't increment
2064            the count of extracting brackets. Ditto for the "once only" bracket,
2065            which is in Perl from version 5.005. */
2066    
2067            case ':':
2068            case '=':
2069            case '!':
2070            case '>':
2071            ptr += 2;
2072            break;
2073    
2074            /* Lookbehinds are in Perl from version 5.005 */
2075    
2076            case '<':
2077            if (ptr[3] == '=' || ptr[3] == '!')
2078              {
2079              ptr += 3;
2080              branch_newextra = 3;
2081              length += 3;         /* For the first branch */
2082              break;
2083              }
2084            *errorptr = ERR24;
2085            goto PCRE_ERROR_RETURN;
2086    
2087            /* Conditionals are in Perl from version 5.005. The bracket must either
2088            be followed by a number (for bracket reference) or by an assertion
2089            group. */
2090    
2091            case '(':
2092            if ((compile_block.ctypes[ptr[3]] & ctype_digit) != 0)
2093              {
2094              ptr += 4;
2095              length += 2;
2096              while ((compile_block.ctypes[*ptr] & ctype_digit) != 0) ptr++;
2097              if (*ptr != ')')
2098                {
2099                *errorptr = ERR26;
2100                goto PCRE_ERROR_RETURN;
2101                }
2102              }
2103            else   /* An assertion must follow */
2104              {
2105              ptr++;   /* Can treat like ':' as far as spacing is concerned */
2106    
2107              if (ptr[2] != '?' || strchr("=!<", ptr[3]) == NULL)
2108                {
2109                ptr += 2;    /* To get right offset in message */
2110                *errorptr = ERR28;
2111                goto PCRE_ERROR_RETURN;
2112                }
2113              }
2114            break;
2115    
2116            /* Else loop checking valid options until ) is met. Anything else is an
2117            error. If we are without any brackets, i.e. at top level, the settings
2118            act as if specified in the options, so massage the options immediately.
2119            This is for backward compatibility with Perl 5.004. */
2120    
2121            default:
2122            set = unset = 0;
2123            optset = &set;
2124            ptr += 2;
2125    
2126            for (;; ptr++)
2127              {
2128              c = *ptr;
2129              switch (c)
2130                {
2131                case 'i':
2132                *optset |= PCRE_CASELESS;
2133                continue;
2134    
2135                case 'm':
2136                *optset |= PCRE_MULTILINE;
2137                continue;
2138    
2139      case '(':              case 's':
2140                *optset |= PCRE_DOTALL;
2141                continue;
2142    
2143      /* Handle special forms of bracket, which all start (? */              case 'x':
2144                *optset |= PCRE_EXTENDED;
2145                continue;
2146    
2147      if (ptr[1] == '?') switch (c = ptr[2])              case 'X':
2148        {              *optset |= PCRE_EXTRA;
2149        /* Skip over comments entirely */              continue;
       case '#':  
       ptr += 3;  
       while (*ptr != 0 && *ptr != ')') ptr++;  
       if (*ptr == 0)  
         {  
         *errorptr = ERR18;  
         goto PCRE_ERROR_RETURN;  
         }  
       continue;  
2150    
2151        /* Non-referencing groups and lookaheads just move the pointer on, and              case 'U':
2152        then behave like a non-special bracket, except that they don't increment              *optset |= PCRE_UNGREEDY;
2153        the count of extracting brackets. */              continue;
   
       case ':':  
       case '=':  
       case '!':  
       ptr += 2;  
       break;  
2154    
2155        /* Ditto for the "once only" bracket, allowed only if the extra bit              case '-':
2156        is set. */              optset = &unset;
2157                continue;
2158    
2159        case '>':              /* A termination by ')' indicates an options-setting-only item;
2160        if ((options & PCRE_EXTRA) != 0)              this is global at top level; otherwise nothing is done here and
2161          {              it is handled during the compiling process on a per-bracket-group
2162          ptr += 2;              basis. */
         break;  
         }  
       /* Else fall thourh */  
2163    
2164        /* Else loop setting valid options until ) is met. Anything else is an              case ')':
2165        error. */              if (brastackptr == 0)
2166                  {
2167                  options = (options | set) & (~unset);
2168                  set = unset = 0;     /* To save length */
2169                  }
2170                /* Fall through */
2171    
2172        default:              /* A termination by ':' indicates the start of a nested group with
2173        ptr += 2;              the given options set. This is again handled at compile time, but
2174        for (;; ptr++)              we must allow for compiled space if any of the ims options are
2175          {              set. We also have to allow for resetting space at the end of
2176          if ((c = *ptr) == 'i')              the group, which is why 4 is added to the length and not just 2.
2177            {              If there are several changes of options within the same group, this
2178            options |= PCRE_CASELESS;              will lead to an over-estimate on the length, but this shouldn't
2179            continue;              matter very much. We also have to allow for resetting options at
2180            }              the start of any alternations, which we do by setting
2181          else if ((c = *ptr) == 'm')              branch_newextra to 2. */
2182            {  
2183            options |= PCRE_MULTILINE;              case ':':
2184            continue;              if (((set|unset) & PCRE_IMS) != 0)
2185            }                {
2186          else if (c == 's')                length += 4;
2187            {                branch_newextra = 2;
2188            options |= PCRE_DOTALL;                }
2189            continue;              goto END_OPTIONS;
2190    
2191                /* Unrecognized option character */
2192    
2193                default:
2194                *errorptr = ERR12;
2195                goto PCRE_ERROR_RETURN;
2196                }
2197            }            }
2198          else if (c == 'x')  
2199            /* If we hit a closing bracket, that's it - this is a freestanding
2200            option-setting. We need to ensure that branch_extra is updated if
2201            necessary. The only values branch_newextra can have here are 0 or 2.
2202            If the value is 2, then branch_extra must either be 2 or 5, depending
2203            on whether this is a lookbehind group or not. */
2204    
2205            END_OPTIONS:
2206            if (c == ')')
2207            {            {
2208            options |= PCRE_EXTENDED;            if (branch_newextra == 2 && (branch_extra == 0 || branch_extra == 3))
2209            length -= spaces;          /* Already counted spaces */              branch_extra += branch_newextra;
2210            continue;            continue;
2211            }            }
         else if (c == ')') break;  
2212    
2213          *errorptr = ERR12;          /* If options were terminated by ':' control comes here. Fall through
2214          goto PCRE_ERROR_RETURN;          to handle the group below. */
2215          }          }
       continue;                      /* End of this bracket handling */  
2216        }        }
2217    
2218      /* Extracting brackets must be counted so we can process escapes in a      /* Extracting brackets must be counted so we can process escapes in a
# Line 1757  while ((c = *(++ptr)) != 0) Line 2221  while ((c = *(++ptr)) != 0)
2221      else bracount++;      else bracount++;
2222    
2223      /* Non-special forms of bracket. Save length for computing whole length      /* Non-special forms of bracket. Save length for computing whole length
2224      at end if there's a repeat that requires duplication of the group. */      at end if there's a repeat that requires duplication of the group. Also
2225        save the current value of branch_extra, and start the new group with
2226        the new value. If non-zero, this will either be 2 for a (?imsx: group, or 3
2227        for a lookbehind assertion. */
2228    
2229      if (brastackptr >= sizeof(brastack)/sizeof(int))      if (brastackptr >= sizeof(brastack)/sizeof(int))
2230        {        {
# Line 1765  while ((c = *(++ptr)) != 0) Line 2232  while ((c = *(++ptr)) != 0)
2232        goto PCRE_ERROR_RETURN;        goto PCRE_ERROR_RETURN;
2233        }        }
2234    
2235        bralenstack[brastackptr] = branch_extra;
2236        branch_extra = branch_newextra;
2237    
2238      brastack[brastackptr++] = length;      brastack[brastackptr++] = length;
2239      length += 3;      length += 3;
2240      continue;      continue;
2241    
2242      /* Handle ket. Look for subsequent max/min; for certain sets of values we      /* Handle ket. Look for subsequent max/min; for certain sets of values we
2243      have to replicate this bracket up to that many times. */      have to replicate this bracket up to that many times. If brastackptr is
2244        0 this is an unmatched bracket which will generate an error, but take care
2245        not to try to access brastack[-1] when computing the length and restoring
2246        the branch_extra value. */
2247    
2248      case ')':      case ')':
2249      length += 3;      length += 3;
2250        {        {
2251        int min = 1;        int minval = 1;
2252        int max = 1;        int maxval = 1;
2253        int duplength = length - brastack[--brastackptr];        int duplength;
2254    
2255          if (brastackptr > 0)
2256            {
2257            duplength = length - brastack[--brastackptr];
2258            branch_extra = bralenstack[brastackptr];
2259            }
2260          else duplength = 0;
2261    
2262        /* Leave ptr at the final char; for read_repeat_counts this happens        /* Leave ptr at the final char; for read_repeat_counts this happens
2263        automatically; for the others we need an increment. */        automatically; for the others we need an increment. */
2264    
2265        if ((c = ptr[1]) == '{' && is_counted_repeat(ptr+2))        if ((c = ptr[1]) == '{' && is_counted_repeat(ptr+2, &compile_block))
2266          {          {
2267          ptr = read_repeat_counts(ptr+2, &min, &max, errorptr);          ptr = read_repeat_counts(ptr+2, &minval, &maxval, errorptr,
2268              &compile_block);
2269          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
2270          }          }
2271        else if (c == '*') { min = 0; max = -1; ptr++; }        else if (c == '*') { minval = 0; maxval = -1; ptr++; }
2272        else if (c == '+') { max = -1; ptr++; }        else if (c == '+') { maxval = -1; ptr++; }
2273        else if (c == '?') { min = 0; ptr++; }        else if (c == '?') { minval = 0; ptr++; }
2274    
2275        /* If there is a minimum > 1 we have to replicate up to min-1 times; if        /* If there is a minimum > 1 we have to replicate up to minval-1 times;
2276        there is a limited maximum we have to replicate up to max-1 times and        if there is a limited maximum we have to replicate up to maxval-1 times
2277        allow for a BRAZERO item before each optional copy, as we also have to        and allow for a BRAZERO item before each optional copy, as we also have
2278        do before the first copy if the minimum is zero. */        to do before the first copy if the minimum is zero. */
2279    
2280        if (min == 0) length++;        if (minval == 0) length++;
2281          else if (min > 1) length += (min - 1) * duplength;          else if (minval > 1) length += (minval - 1) * duplength;
2282        if (max > min) length += (max - min) * (duplength + 1);        if (maxval > minval) length += (maxval - minval) * (duplength + 1);
2283        }        }
   
2284      continue;      continue;
2285    
2286      /* Non-special character. For a run of such characters the length required      /* Non-special character. For a run of such characters the length required
# Line 1814  while ((c = *(++ptr)) != 0) Line 2294  while ((c = *(++ptr)) != 0)
2294      runlength = 0;      runlength = 0;
2295      do      do
2296        {        {
2297        if ((pcre_ctypes[c] & ctype_space) != 0)        if ((options & PCRE_EXTENDED) != 0)
         {  
         if ((options & PCRE_EXTENDED) != 0) continue;  
         spaces++;  
         }  
   
       if (c == '#' && (options & PCRE_EXTENDED) != 0)  
2298          {          {
2299          while ((c = *(++ptr)) != 0 && c != '\n');          if ((compile_block.ctypes[c] & ctype_space) != 0) continue;
2300          continue;          if (c == '#')
2301              {
2302              while ((c = *(++ptr)) != 0 && c != '\n');
2303              continue;
2304              }
2305          }          }
2306    
2307        /* Backslash may introduce a data char or a metacharacter; stop the        /* Backslash may introduce a data char or a metacharacter; stop the
# Line 1831  while ((c = *(++ptr)) != 0) Line 2309  while ((c = *(++ptr)) != 0)
2309    
2310        if (c == '\\')        if (c == '\\')
2311          {          {
2312          uschar *saveptr = ptr;          const uschar *saveptr = ptr;
2313          c = check_escape(&ptr, errorptr, bracount, options, FALSE);          c = check_escape(&ptr, errorptr, bracount, options, FALSE,
2314              &compile_block);
2315          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
2316          if (c < 0) { ptr = saveptr; break; }          if (c < 0) { ptr = saveptr; break; }
2317          }          }
# Line 1844  while ((c = *(++ptr)) != 0) Line 2323  while ((c = *(++ptr)) != 0)
2323    
2324      /* This "while" is the end of the "do" above. */      /* This "while" is the end of the "do" above. */
2325    
2326      while (runlength < 255 && (pcre_ctypes[c = *(++ptr)] & ctype_meta) == 0);      while (runlength < 255 &&
2327          (compile_block.ctypes[c = *(++ptr)] & ctype_meta) == 0);
2328    
2329      ptr--;      ptr--;
2330      length += runlength;      length += runlength;
# Line 1861  if (length > 65539) Line 2341  if (length > 65539)
2341    }    }
2342    
2343  /* Compute the size of data block needed and get it, either from malloc or  /* Compute the size of data block needed and get it, either from malloc or
2344  externally provided function. Put in the magic number and the options. */  externally provided function. We specify "code[0]" in the offsetof() expression
2345    rather than just "code", because it has been reported that one broken compiler
2346    fails on "code" because it is also an independent variable. It should make no
2347    difference to the value of the offsetof(). */
2348    
2349  size = length + offsetof(real_pcre, code);  size = length + offsetof(real_pcre, code[0]);
2350  re = (real_pcre *)(pcre_malloc)(size);  re = (real_pcre *)(pcre_malloc)(size);
2351    
2352  if (re == NULL)  if (re == NULL)
# Line 1872  if (re == NULL) Line 2355  if (re == NULL)
2355    return NULL;    return NULL;
2356    }    }
2357    
2358    /* Put in the magic number and the options. */
2359    
2360  re->magic_number = MAGIC_NUMBER;  re->magic_number = MAGIC_NUMBER;
2361  re->options = options;  re->options = options;
2362    re->tables = tables;
2363    
2364  /* Set up a starting, non-extracting bracket, then compile the expression. On  /* Set up a starting, non-extracting bracket, then compile the expression. On
2365  error, *errorptr will be set non-NULL, so we don't need to look at the result  error, *errorptr will be set non-NULL, so we don't need to look at the result
2366  of the function here. */  of the function here. */
2367    
2368  ptr = (uschar *)pattern;  ptr = (const uschar *)pattern;
2369  code = re->code;  code = re->code;
2370  *code = OP_BRA;  *code = OP_BRA;
2371  bracount = 0;  bracount = 0;
2372  (void)compile_regex(options, &bracount, &code, &ptr, errorptr);  (void)compile_regex(options, -1, &bracount, &code, &ptr, errorptr, FALSE, -1,
2373      &compile_block);
2374  re->top_bracket = bracount;  re->top_bracket = bracount;
2375  re->top_backref = top_backref;  re->top_backref = top_backref;
2376    
# Line 1900  if debugging, leave the test till after Line 2387  if debugging, leave the test till after
2387  if (code - re->code > length) *errorptr = ERR23;  if (code - re->code > length) *errorptr = ERR23;
2388  #endif  #endif
2389    
2390    /* Give an error if there's back reference to a non-existent capturing
2391    subpattern. */
2392    
2393    if (top_backref > re->top_bracket) *errorptr = ERR15;
2394    
2395  /* Failed to compile */  /* Failed to compile */
2396    
2397  if (*errorptr != NULL)  if (*errorptr != NULL)
2398    {    {
2399    (pcre_free)(re);    (pcre_free)(re);
2400    PCRE_ERROR_RETURN:    PCRE_ERROR_RETURN:
2401    *erroroffset = ptr - (uschar *)pattern;    *erroroffset = ptr - (const uschar *)pattern;
2402    return NULL;    return NULL;
2403    }    }
2404    
# Line 1918  to set the PCRE_STARTLINE flag if all br Line 2410  to set the PCRE_STARTLINE flag if all br
2410    
2411  if ((options & PCRE_ANCHORED) == 0)  if ((options & PCRE_ANCHORED) == 0)
2412    {    {
2413    if (is_anchored(re->code, (options & PCRE_MULTILINE) != 0))    int temp_options = options;
2414      if (is_anchored(re->code, &temp_options))
2415      re->options |= PCRE_ANCHORED;      re->options |= PCRE_ANCHORED;
2416    else    else
2417      {      {
2418      int c = find_firstchar(re->code);      int ch = find_firstchar(re->code, &temp_options);
2419      if (c >= 0)      if (ch >= 0)
2420        {        {
2421        re->first_char = c;        re->first_char = ch;
2422        re->options |= PCRE_FIRSTSET;        re->options |= PCRE_FIRSTSET;
2423        }        }
2424      else if (is_startline(re->code))      else if (is_startline(re->code))
# Line 1937  if ((options & PCRE_ANCHORED) == 0) Line 2430  if ((options & PCRE_ANCHORED) == 0)
2430    
2431  #ifdef DEBUG  #ifdef DEBUG
2432    
2433  printf("Length = %d top_bracket = %d top_backref=%d\n",  printf("Length = %d top_bracket = %d top_backref = %d\n",
2434    length, re->top_bracket, re->top_backref);    length, re->top_bracket, re->top_backref);
2435    
2436  if (re->options != 0)  if (re->options != 0)
2437    {    {
2438    printf("%s%s%s%s%s%s%s\n",    printf("%s%s%s%s%s%s%s%s\n",
2439      ((re->options & PCRE_ANCHORED) != 0)? "anchored " : "",      ((re->options & PCRE_ANCHORED) != 0)? "anchored " : "",
2440      ((re->options & PCRE_CASELESS) != 0)? "caseless " : "",      ((re->options & PCRE_CASELESS) != 0)? "caseless " : "",
2441      ((re->options & PCRE_EXTENDED) != 0)? "extended " : "",      ((re->options & PCRE_EXTENDED) != 0)? "extended " : "",
2442      ((re->options & PCRE_MULTILINE) != 0)? "multiline " : "",      ((re->options & PCRE_MULTILINE) != 0)? "multiline " : "",
2443      ((re->options & PCRE_DOTALL) != 0)? "dotall " : "",      ((re->options & PCRE_DOTALL) != 0)? "dotall " : "",
2444      ((re->options & PCRE_DOLLAR_ENDONLY) != 0)? "endonly " : "",      ((re->options & PCRE_DOLLAR_ENDONLY) != 0)? "endonly " : "",
2445      ((re->options & PCRE_EXTRA) != 0)? "extra " : "");      ((re->options & PCRE_EXTRA) != 0)? "extra " : "",
2446        ((re->options & PCRE_UNGREEDY) != 0)? "ungreedy " : "");
2447    }    }
2448    
2449  if ((re->options & PCRE_FIRSTSET) != 0)  if ((re->options & PCRE_FIRSTSET) != 0)
# Line 1975  while (code < code_end) Line 2469  while (code < code_end)
2469    
2470    else switch(*code)    else switch(*code)
2471      {      {
2472        case OP_OPT:
2473        printf(" %.2x %s", code[1], OP_names[*code]);
2474        code++;
2475        break;
2476    
2477        case OP_COND:
2478        printf("%3d Cond", (code[1] << 8) + code[2]);
2479        code += 2;
2480        break;
2481    
2482        case OP_CREF:
2483        printf(" %.2d %s", code[1], OP_names[*code]);
2484        code++;
2485        break;
2486    
2487      case OP_CHARS:      case OP_CHARS:
2488      charlength = *(++code);      charlength = *(++code);
2489      printf("%3d ", charlength);      printf("%3d ", charlength);
# Line 1988  while (code < code_end) Line 2497  while (code < code_end)
2497      case OP_KET:      case OP_KET:
2498      case OP_ASSERT:      case OP_ASSERT:
2499      case OP_ASSERT_NOT:      case OP_ASSERT_NOT:
2500        case OP_ASSERTBACK:
2501        case OP_ASSERTBACK_NOT:
2502      case OP_ONCE:      case OP_ONCE:
2503      printf("%3d %s", (code[1] << 8) + code[2], OP_names[*code]);      printf("%3d %s", (code[1] << 8) + code[2], OP_names[*code]);
2504      code += 2;      code += 2;
2505      break;      break;
2506    
2507        case OP_REVERSE:
2508        printf("%3d %s", (code[1] << 8) + code[2], OP_names[*code]);
2509        code += 2;
2510        break;
2511    
2512      case OP_STAR:      case OP_STAR:
2513      case OP_MINSTAR:      case OP_MINSTAR:
2514      case OP_PLUS:      case OP_PLUS:
# Line 2017  while (code < code_end) Line 2533  while (code < code_end)
2533      case OP_MINUPTO:      case OP_MINUPTO:
2534      if (isprint(c = code[3])) printf("    %c{", c);      if (isprint(c = code[3])) printf("    %c{", c);
2535        else printf("    \\x%02x{", c);        else printf("    \\x%02x{", c);
2536      if (*code != OP_EXACT) printf(",");      if (*code != OP_EXACT) printf("0,");
2537      printf("%d}", (code[1] << 8) + code[2]);      printf("%d}", (code[1] << 8) + code[2]);
2538      if (*code == OP_MINUPTO) printf("?");      if (*code == OP_MINUPTO) printf("?");
2539      code += 3;      code += 3;
# Line 2062  while (code < code_end) Line 2578  while (code < code_end)
2578    
2579      case OP_REF:      case OP_REF:
2580      printf("    \\%d", *(++code));      printf("    \\%d", *(++code));
2581      break;      code ++;
2582        goto CLASS_REF_REPEAT;
2583    
2584      case OP_CLASS:      case OP_CLASS:
2585        {        {
2586        int i, min, max;        int i, min, max;
   
2587        code++;        code++;
2588        printf("    [");        printf("    [");
2589    
# Line 2092  while (code < code_end) Line 2608  while (code < code_end)
2608        printf("]");        printf("]");
2609        code += 32;        code += 32;
2610    
2611          CLASS_REF_REPEAT:
2612    
2613        switch(*code)        switch(*code)
2614          {          {
2615          case OP_CRSTAR:          case OP_CRSTAR:
# Line 2149  return (pcre *)re; Line 2667  return (pcre *)re;
2667    
2668    
2669  /*************************************************  /*************************************************
 *        Match a character type                  *  
 *************************************************/  
   
 /* Not used in all the places it might be as it's sometimes faster  
 to put the code inline.  
   
 Arguments:  
   type        the character type  
   c           the character  
   dotall      the dotall flag  
   
 Returns:      TRUE if character is of the type  
 */  
   
 static BOOL  
 match_type(int type, int c, BOOL dotall)  
 {  
   
 #ifdef DEBUG  
 if (isprint(c)) printf("matching subject %c against ", c);  
   else printf("matching subject \\x%02x against ", c);  
 printf("%s\n", OP_names[type]);  
 #endif  
   
 switch(type)  
   {  
   case OP_ANY:            return dotall || c != '\n';  
   case OP_NOT_DIGIT:      return (pcre_ctypes[c] & ctype_digit) == 0;  
   case OP_DIGIT:          return (pcre_ctypes[c] & ctype_digit) != 0;  
   case OP_NOT_WHITESPACE: return (pcre_ctypes[c] & ctype_space) == 0;  
   case OP_WHITESPACE:     return (pcre_ctypes[c] & ctype_space) != 0;  
   case OP_NOT_WORDCHAR:   return (pcre_ctypes[c] & ctype_word) == 0;  
   case OP_WORDCHAR:       return (pcre_ctypes[c] & ctype_word) != 0;  
   }  
 return FALSE;  
 }  
   
   
   
 /*************************************************  
2670  *          Match a back-reference                *  *          Match a back-reference                *
2671  *************************************************/  *************************************************/
2672    
2673  /* If a back reference hasn't been set, the match fails.  /* If a back reference hasn't been set, the length that is passed is greater
2674    than the number of characters left in the string, so the match fails.
2675    
2676  Arguments:  Arguments:
2677    number      reference number    offset      index into the offset vector
2678    eptr        points into the subject    eptr        points into the subject
2679    length      length to be matched    length      length to be matched
2680    md          points to match data block    md          points to match data block
2681      ims         the ims flags
2682    
2683  Returns:      TRUE if matched  Returns:      TRUE if matched
2684  */  */
2685    
2686  static BOOL  static BOOL
2687  match_ref(int number, register uschar *eptr, int length, match_data *md)  match_ref(int offset, register const uschar *eptr, int length, match_data *md,
2688      int ims)
2689  {  {
2690  uschar *p = md->start_subject + md->offset_vector[number];  const uschar *p = md->start_subject + md->offset_vector[offset];
2691    
2692  #ifdef DEBUG  #ifdef DEBUG
2693  if (eptr >= md->end_subject)  if (eptr >= md->end_subject)
# Line 2223  printf("\n"); Line 2704  printf("\n");
2704    
2705  /* Always fail if not enough characters left */  /* Always fail if not enough characters left */
2706    
2707  if (length > md->end_subject - p) return FALSE;  if (length > md->end_subject - eptr) return FALSE;
2708    
2709  /* Separate the caselesss case for speed */  /* Separate the caselesss case for speed */
2710    
2711  if (md->caseless)  if ((ims & PCRE_CASELESS) != 0)
2712    { while (length-- > 0) if (pcre_lcc[*p++] != pcre_lcc[*eptr++]) return FALSE; }    {
2713      while (length-- > 0)
2714        if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE;
2715      }
2716  else  else
2717    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
2718    
# Line 2241  return TRUE; Line 2725  return TRUE;
2725  *         Match from current position            *  *         Match from current position            *
2726  *************************************************/  *************************************************/
2727    
2728  /* On entry ecode points to the first opcode, and eptr to the first character.  /* On entry ecode points to the first opcode, and eptr to the first character
2729    in the subject string, while eptrb holds the value of eptr at the start of the
2730    last bracketed group - used for breaking infinite loops matching zero-length
2731    strings.
2732    
2733  Arguments:  Arguments:
2734     eptr        pointer in subject     eptr        pointer in subject
2735     ecode       position in code     ecode       position in code
2736     offset_top  current top pointer     offset_top  current top pointer
2737     md          pointer to "static" info for the match     md          pointer to "static" info for the match
2738       ims         current /i, /m, and /s options
2739       condassert  TRUE if called to check a condition assertion
2740       eptrb       eptr at start of last bracket
2741    
2742  Returns:       TRUE if matched  Returns:       TRUE if matched
2743  */  */
2744    
2745  static BOOL  static BOOL
2746  match(register uschar *eptr, register uschar *ecode, int offset_top,  match(register const uschar *eptr, register const uschar *ecode,
2747    match_data *md)    int offset_top, match_data *md, int ims, BOOL condassert, const uschar *eptrb)
2748  {  {
2749    int original_ims = ims;   /* Save for resetting on ')' */
2750    
2751  for (;;)  for (;;)
2752    {    {
2753      int op = (int)*ecode;
2754    int min, max, ctype;    int min, max, ctype;
2755    register int i;    register int i;
2756    register int c;    register int c;
2757    BOOL minimize;    BOOL minimize = FALSE;
2758    
2759    /* Opening bracket. Check the alternative branches in turn, failing if none    /* Opening capturing bracket. If there is space in the offset vector, save
2760    match. We have to set the start offset if required and there is space    the current subject position in the working slot at the top of the vector. We
2761    in the offset vector so that it is available for subsequent back references    mustn't change the current values of the data slot, because they may be set
2762    if the bracket matches. However, if the bracket fails, we must put back the    from a previous iteration of this group, and be referred to by a reference
2763    previous value of both offsets in case they were set by a previous copy of    inside the group.
2764    the same bracket. Don't worry about setting the flag for the error case here;  
2765    that is handled in the code for KET. */    If the bracket fails to match, we need to restore this value and also the
2766      values of the final offsets, in case they were set by a previous iteration of
2767      the same bracket.
2768    
2769      If there isn't enough space in the offset vector, treat this as if it were a
2770      non-capturing bracket. Don't worry about setting the flag for the error case
2771      here; that is handled in the code for KET. */
2772    
2773    if ((int)*ecode >= OP_BRA)    if (op > OP_BRA)
2774      {      {
2775      int number = (*ecode - OP_BRA) << 1;      int number = op - OP_BRA;
2776      int save_offset1, save_offset2;      int offset = number << 1;
2777    
2778      #ifdef DEBUG      DPRINTF(("start bracket %d\n", number));
     printf("start bracket %d\n", number/2);  
     #endif  
2779    
2780      if (number > 0 && number < md->offset_end)      if (offset < md->offset_max)
2781        {        {
2782        save_offset1 = md->offset_vector[number];        int save_offset1 = md->offset_vector[offset];
2783        save_offset2 = md->offset_vector[number+1];        int save_offset2 = md->offset_vector[offset+1];
2784        md->offset_vector[number] = eptr - md->start_subject;        int save_offset3 = md->offset_vector[md->offset_end - number];
2785    
2786          DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
2787          md->offset_vector[md->offset_end - number] = eptr - md->start_subject;
2788    
2789          do
2790            {
2791            if (match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr)) return TRUE;
2792            ecode += (ecode[1] << 8) + ecode[2];
2793            }
2794          while (*ecode == OP_ALT);
2795    
2796        #ifdef DEBUG        DPRINTF(("bracket %d failed\n", number));
2797        printf("saving %d %d\n", save_offset1, save_offset2);  
2798        #endif        md->offset_vector[offset] = save_offset1;
2799          md->offset_vector[offset+1] = save_offset2;
2800          md->offset_vector[md->offset_end - number] = save_offset3;
2801          return FALSE;
2802        }        }
2803    
2804      /* Recurse for all the alternatives. */      /* Insufficient room for saving captured contents */
2805    
2806        else op = OP_BRA;
2807        }
2808    
2809      /* Other types of node can be handled by a switch */
2810    
2811      switch(op)
2812        {
2813        case OP_BRA:     /* Non-capturing bracket: optimized */
2814        DPRINTF(("start bracket 0\n"));
2815      do      do
2816        {        {
2817        if (match(eptr, ecode+3, offset_top, md)) return TRUE;        if (match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr)) return TRUE;
2818        ecode += (ecode[1] << 8) + ecode[2];        ecode += (ecode[1] << 8) + ecode[2];
2819        }        }
2820      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
2821        DPRINTF(("bracket 0 failed\n"));
2822        return FALSE;
2823    
2824      #ifdef DEBUG      /* Conditional group: compilation checked that there are no more than
2825      printf("bracket %d failed\n", number/2);      two branches. If the condition is false, skipping the first branch takes us
2826      #endif      past the end if there is only one branch, but that's OK because that is
2827        exactly what going to the ket would do. */
2828    
2829        case OP_COND:
2830        if (ecode[3] == OP_CREF)         /* Condition is extraction test */
2831          {
2832          int offset = ecode[4] << 1;    /* Doubled reference number */
2833          return match(eptr,
2834            ecode + ((offset < offset_top && md->offset_vector[offset] >= 0)?
2835              5 : 3 + (ecode[1] << 8) + ecode[2]),
2836            offset_top, md, ims, FALSE, eptr);
2837          }
2838    
2839        /* The condition is an assertion. Call match() to evaluate it - setting
2840        the final argument TRUE causes it to stop at the end of an assertion. */
2841    
2842      if (number > 0 && number < md->offset_end)      else
2843        {        {
2844        md->offset_vector[number] = save_offset1;        if (match(eptr, ecode+3, offset_top, md, ims, TRUE, NULL))
2845        md->offset_vector[number+1] = save_offset2;          {
2846            ecode += 3 + (ecode[4] << 8) + ecode[5];
2847            while (*ecode == OP_ALT) ecode += (ecode[1] << 8) + ecode[2];
2848            }
2849          else ecode += (ecode[1] << 8) + ecode[2];
2850          return match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr);
2851        }        }
2852        /* Control never reaches here */
2853    
2854      return FALSE;      /* Skip over conditional reference data if encountered (should not be) */
     }  
2855    
2856    /* Other types of node can be handled by a switch */      case OP_CREF:
2857        ecode += 2;
2858        break;
2859    
2860        /* End of the pattern */
2861    
   switch(*ecode)  
     {  
2862      case OP_END:      case OP_END:
2863      md->end_match_ptr = eptr;          /* Record where we ended */      md->end_match_ptr = eptr;          /* Record where we ended */
2864      md->end_offset_top = offset_top;   /* and how many extracts were taken */      md->end_offset_top = offset_top;   /* and how many extracts were taken */
2865      return TRUE;      return TRUE;
2866    
2867      /* The equivalent of Prolog's "cut" - if the rest doesn't match, the      /* Change option settings */
     whole thing doesn't match, so we have to get out via a longjmp(). */  
2868    
2869      case OP_CUT:      case OP_OPT:
2870      if (match(eptr, ecode+1, offset_top, md)) return TRUE;      ims = ecode[1];
2871      longjmp(md->fail_env, 1);      ecode += 2;
2872        DPRINTF(("ims set to %02x\n", ims));
2873        break;
2874    
2875      /* Assertion brackets. Check the alternative branches in turn - the      /* Assertion brackets. Check the alternative branches in turn - the
2876      matching won't pass the KET for an assertion. If any one branch matches,      matching won't pass the KET for an assertion. If any one branch matches,
2877      the assertion is true. */      the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
2878        start of each branch to move the current point backwards, so the code at
2879        this level is identical to the lookahead case. */
2880    
2881      case OP_ASSERT:      case OP_ASSERT:
2882        case OP_ASSERTBACK:
2883      do      do
2884        {        {
2885        if (match(eptr, ecode+3, offset_top, md)) break;        if (match(eptr, ecode+3, offset_top, md, ims, FALSE, NULL)) break;
2886        ecode += (ecode[1] << 8) + ecode[2];        ecode += (ecode[1] << 8) + ecode[2];
2887        }        }
2888      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
2889      if (*ecode == OP_KET) return FALSE;      if (*ecode == OP_KET) return FALSE;
2890    
2891        /* If checking an assertion for a condition, return TRUE. */
2892    
2893        if (condassert) return TRUE;
2894    
2895      /* Continue from after the assertion, updating the offsets high water      /* Continue from after the assertion, updating the offsets high water
2896      mark, since extracts may have been taken during the assertion. */      mark, since extracts may have been taken during the assertion. */
2897    
# Line 2353  for (;;) Line 2903  for (;;)
2903      /* Negative assertion: all branches must fail to match */      /* Negative assertion: all branches must fail to match */
2904    
2905      case OP_ASSERT_NOT:      case OP_ASSERT_NOT:
2906        case OP_ASSERTBACK_NOT:
2907      do      do
2908        {        {
2909        if (match(eptr, ecode+3, offset_top, md)) return FALSE;        if (match(eptr, ecode+3, offset_top, md, ims, FALSE, NULL)) return FALSE;
2910        ecode += (ecode[1] << 8) + ecode[2];        ecode += (ecode[1] << 8) + ecode[2];
2911        }        }
2912      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
2913    
2914        if (condassert) return TRUE;
2915      ecode += 3;      ecode += 3;
2916      continue;      continue;
2917    
2918        /* Move the subject pointer back. This occurs only at the start of
2919        each branch of a lookbehind assertion. If we are too close to the start to
2920        move back, this match function fails. */
2921    
2922        case OP_REVERSE:
2923        eptr -= (ecode[1] << 8) + ecode[2];
2924        if (eptr < md->start_subject) return FALSE;
2925        ecode += 3;
2926        break;
2927    
2928    
2929      /* "Once" brackets are like assertion brackets except that after a match,      /* "Once" brackets are like assertion brackets except that after a match,
2930      the point in the subject string is not moved back. Thus there can never be      the point in the subject string is not moved back. Thus there can never be
2931      a move back into the brackets. Check the alternative branches in turn - the      a move back into the brackets. Check the alternative branches in turn - the
2932      matching won't pass the KET for this kind of subpattern. If any one branch      matching won't pass the KET for this kind of subpattern. If any one branch
2933      matches, we carry on, leaving the subject pointer. */      matches, we carry on as at the end of a normal bracket, leaving the subject
2934        pointer. */
2935    
2936      case OP_ONCE:      case OP_ONCE:
     do  
2937        {        {
2938        if (match(eptr, ecode+3, offset_top, md)) break;        const uschar *prev = ecode;
       ecode += (ecode[1] << 8) + ecode[2];  
       }  
     while (*ecode == OP_ALT);  
     if (*ecode == OP_KET) return FALSE;  
2939    
2940      /* Continue as from after the assertion, updating the offsets high water        do
2941      mark, since extracts may have been taken. */          {
2942            if (match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr)) break;
2943            ecode += (ecode[1] << 8) + ecode[2];
2944            }
2945          while (*ecode == OP_ALT);
2946    
2947      do ecode += (ecode[1] << 8) + ecode[2]; while (*ecode == OP_ALT);        /* If hit the end of the group (which could be repeated), fail */
2948      ecode += 3;  
2949      offset_top = md->end_offset_top;        if (*ecode != OP_ONCE && *ecode != OP_ALT) return FALSE;
2950      eptr = md->end_match_ptr;  
2951      continue;        /* Continue as from after the assertion, updating the offsets high water
2952          mark, since extracts may have been taken. */
2953    
2954          do ecode += (ecode[1] << 8) + ecode[2]; while (*ecode == OP_ALT);
2955    
2956          offset_top = md->end_offset_top;
2957          eptr = md->end_match_ptr;
2958    
2959          /* For a non-repeating ket, just continue at this level. This also
2960          happens for a repeating ket if no characters were matched in the group.
2961          This is the forcible breaking of infinite loops as implemented in Perl
2962          5.005. If there is an options reset, it will get obeyed in the normal
2963          course of events. */
2964    
2965          if (*ecode == OP_KET || eptr == eptrb)
2966            {
2967            ecode += 3;
2968            break;
2969            }
2970    
2971          /* The repeating kets try the rest of the pattern or restart from the
2972          preceding bracket, in the appropriate order. We need to reset any options
2973          that changed within the bracket before re-running it, so check the next
2974          opcode. */
2975    
2976          if (ecode[3] == OP_OPT)
2977            {
2978            ims = (ims & ~PCRE_IMS) | ecode[4];
2979            DPRINTF(("ims set to %02x at group repeat\n", ims));
2980            }
2981    
2982          if (*ecode == OP_KETRMIN)
2983            {
2984            if (match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr) ||
2985                match(eptr, prev, offset_top, md, ims, FALSE, eptr)) return TRUE;
2986            }
2987          else  /* OP_KETRMAX */
2988            {
2989            if (match(eptr, prev, offset_top, md, ims, FALSE, eptr) ||
2990                match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr)) return TRUE;
2991            }
2992          }
2993        return FALSE;
2994    
2995      /* An alternation is the end of a branch; scan along to find the end of the      /* An alternation is the end of a branch; scan along to find the end of the
2996      bracketed group and go to there. */      bracketed group and go to there. */
# Line 2401  for (;;) Line 3007  for (;;)
3007    
3008      case OP_BRAZERO:      case OP_BRAZERO:
3009        {        {
3010        uschar *next = ecode+1;        const uschar *next = ecode+1;
3011        if (match(eptr, next, offset_top, md)) return TRUE;        if (match(eptr, next, offset_top, md, ims, FALSE, eptr)) return TRUE;
3012        do next += (next[1] << 8) + next[2]; while (*next == OP_ALT);        do next += (next[1] << 8) + next[2]; while (*next == OP_ALT);
3013        ecode = next + 3;        ecode = next + 3;
3014        }        }
# Line 2410  for (;;) Line 3016  for (;;)
3016    
3017      case OP_BRAMINZERO:      case OP_BRAMINZERO:
3018        {        {
3019        uschar *next = ecode+1;        const uschar *next = ecode+1;
3020        do next += (next[1] << 8) + next[2]; while (*next == OP_ALT);        do next += (next[1] << 8) + next[2]; while (*next == OP_ALT);
3021        if (match(eptr, next+3, offset_top, md)) return TRUE;        if (match(eptr, next+3, offset_top, md, ims, FALSE, eptr)) return TRUE;
3022        ecode++;        ecode++;
3023        }        }
3024      break;;      break;
3025    
3026      /* End of a group, repeated or non-repeating. If we are at the end of      /* End of a group, repeated or non-repeating. If we are at the end of
3027      an assertion "group", stop matching and return TRUE, but record the      an assertion "group", stop matching and return TRUE, but record the
3028      current high water mark for use by positive assertions. */      current high water mark for use by positive assertions. Do this also
3029        for the "once" (not-backup up) groups. */
3030    
3031      case OP_KET:      case OP_KET:
3032      case OP_KETRMIN:      case OP_KETRMIN:
3033      case OP_KETRMAX:      case OP_KETRMAX:
3034        {        {
3035        int number;        const uschar *prev = ecode - (ecode[1] << 8) - ecode[2];
       uschar *prev = ecode - (ecode[1] << 8) - ecode[2];  
3036    
3037        if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT || *prev == OP_ONCE)        if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
3038              *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
3039              *prev == OP_ONCE)
3040          {          {
3041          md->end_match_ptr = eptr;      /* For ONCE */          md->end_match_ptr = eptr;      /* For ONCE */
3042          md->end_offset_top = offset_top;          md->end_offset_top = offset_top;
3043          return TRUE;          return TRUE;
3044          }          }
3045    
3046        /* In all other cases we have to check the group number back at the        /* In all other cases except a conditional group we have to check the
3047        start and if necessary complete handling an extraction by setting the        group number back at the start and if necessary complete handling an
3048        final offset and bumping the high water mark. */        extraction by setting the offsets and bumping the high water mark. */
3049    
3050        number = (*prev - OP_BRA) << 1;        if (*prev != OP_COND)
3051            {
3052            int number = *prev - OP_BRA;
3053            int offset = number << 1;
3054    
3055        #ifdef DEBUG          DPRINTF(("end bracket %d\n", number));
       printf("end bracket %d\n", number/2);  
       #endif  
3056    
3057        if (number > 0)          if (number > 0)
         {  
         if (number >= md->offset_end) md->offset_overflow = TRUE; else  
3058            {            {
3059            md->offset_vector[number+1] = eptr - md->start_subject;            if (offset >= md->offset_max) md->offset_overflow = TRUE; else
3060            if (offset_top <= number) offset_top = number + 2;              {
3061                md->offset_vector[offset] =
3062                  md->offset_vector[md->offset_end - number];
3063                md->offset_vector[offset+1] = eptr - md->start_subject;
3064                if (offset_top <= offset) offset_top = offset + 2;
3065                }
3066            }            }
3067          }          }
3068    
3069        /* For a non-repeating ket, just advance to the next node and continue at        /* Reset the value of the ims flags, in case they got changed during
3070        this level. */        the group. */
3071    
3072          ims = original_ims;
3073          DPRINTF(("ims reset to %02x\n", ims));
3074    
3075        if (*ecode == OP_KET)        /* For a non-repeating ket, just continue at this level. This also
3076          happens for a repeating ket if no characters were matched in the group.
3077          This is the forcible breaking of infinite loops as implemented in Perl
3078          5.005. If there is an options reset, it will get obeyed in the normal
3079          course of events. */
3080    
3081          if (*ecode == OP_KET || eptr == eptrb)
3082          {          {
3083          ecode += 3;          ecode += 3;
3084          break;          break;
# Line 2468  for (;;) Line 3089  for (;;)
3089    
3090        if (*ecode == OP_KETRMIN)        if (*ecode == OP_KETRMIN)
3091          {          {
3092          if (match(eptr, ecode+3, offset_top, md) ||          if (match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr) ||
3093              match(eptr, prev, offset_top, md)) return TRUE;              match(eptr, prev, offset_top, md, ims, FALSE, eptr)) return TRUE;
3094          }          }
3095        else  /* OP_KETRMAX */        else  /* OP_KETRMAX */
3096          {          {
3097          if (match(eptr, prev, offset_top, md) ||          if (match(eptr, prev, offset_top, md, ims, FALSE, eptr) ||
3098              match(eptr, ecode+3, offset_top, md)) return TRUE;              match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr)) return TRUE;
3099          }          }
3100        }        }
3101      return FALSE;      return FALSE;
# Line 2483  for (;;) Line 3104  for (;;)
3104    
3105      case OP_CIRC:      case OP_CIRC:
3106      if (md->notbol && eptr == md->start_subject) return FALSE;      if (md->notbol && eptr == md->start_subject) return FALSE;
3107      if (md->multiline)      if ((ims & PCRE_MULTILINE) != 0)
3108        {        {
3109        if (eptr != md->start_subject && eptr[-1] != '\n') return FALSE;        if (eptr != md->start_subject && eptr[-1] != '\n') return FALSE;
3110        ecode++;        ecode++;
# Line 2498  for (;;) Line 3119  for (;;)
3119      ecode++;      ecode++;
3120      break;      break;
3121    
3122      /* Assert before internal newline if multiline, or before      /* Assert before internal newline if multiline, or before a terminating
3123      a terminating newline unless endonly is set, else end of subject unless      newline unless endonly is set, else end of subject unless noteol is set. */
     noteol is set. */  
3124    
3125      case OP_DOLL:      case OP_DOLL:
3126      if (md->noteol && eptr >= md->end_subject) return FALSE;      if ((ims & PCRE_MULTILINE) != 0)
     if (md->multiline)  
3127        {        {
3128        if (eptr < md->end_subject && *eptr != '\n') return FALSE;        if (eptr < md->end_subject) { if (*eptr != '\n') return FALSE; }
3129            else { if (md->noteol) return FALSE; }
3130        ecode++;        ecode++;
3131        break;        break;
3132        }        }
3133      else if (!md->endonly)      else
3134        {        {
3135        if (eptr < md->end_subject - 1 ||        if (md->noteol) return FALSE;
3136           (eptr == md->end_subject - 1 && *eptr != '\n')) return FALSE;        if (!md->endonly)
3137        ecode++;          {
3138        break;          if (eptr < md->end_subject - 1 ||
3139               (eptr == md->end_subject - 1 && *eptr != '\n')) return FALSE;
3140    
3141            ecode++;
3142            break;
3143            }
3144        }        }
3145      /* ... else fall through */      /* ... else fall through */
3146    
3147      /* End of subject assertion */      /* End of subject assertion (\z) */
3148    
3149      case OP_EOD:      case OP_EOD:
3150      if (eptr < md->end_subject) return FALSE;      if (eptr < md->end_subject) return FALSE;
3151      ecode++;      ecode++;
3152      break;      break;
3153    
3154        /* End of subject or ending \n assertion (\Z) */
3155    
3156        case OP_EODN:
3157        if (eptr < md->end_subject - 1 ||
3158           (eptr == md->end_subject - 1 && *eptr != '\n')) return FALSE;
3159        ecode++;
3160        break;
3161    
3162      /* Word boundary assertions */      /* Word boundary assertions */
3163    
3164      case OP_NOT_WORD_BOUNDARY:      case OP_NOT_WORD_BOUNDARY:
3165      case OP_WORD_BOUNDARY:      case OP_WORD_BOUNDARY:
3166        {        {
3167        BOOL prev_is_word = (eptr != md->start_subject) &&        BOOL prev_is_word = (eptr != md->start_subject) &&
3168          ((pcre_ctypes[eptr[-1]] & ctype_word) != 0);          ((md->ctypes[eptr[-1]] & ctype_word) != 0);
3169        BOOL cur_is_word = (eptr < md->end_subject) &&        BOOL cur_is_word = (eptr < md->end_subject) &&
3170          ((pcre_ctypes[*eptr] & ctype_word) != 0);          ((md->ctypes[*eptr] & ctype_word) != 0);
3171        if ((*ecode++ == OP_WORD_BOUNDARY)?        if ((*ecode++ == OP_WORD_BOUNDARY)?
3172             cur_is_word == prev_is_word : cur_is_word != prev_is_word)             cur_is_word == prev_is_word : cur_is_word != prev_is_word)
3173          return FALSE;          return FALSE;
# Line 2544  for (;;) Line 3177  for (;;)
3177      /* Match a single character type; inline for speed */      /* Match a single character type; inline for speed */
3178    
3179      case OP_ANY:      case OP_ANY:
3180      if (!md->dotall && eptr < md->end_subject && *eptr == '\n') return FALSE;      if ((ims & PCRE_DOTALL) == 0 && eptr < md->end_subject && *eptr == '\n')
3181          return FALSE;
3182      if (eptr++ >= md->end_subject) return FALSE;      if (eptr++ >= md->end_subject) return FALSE;
3183      ecode++;      ecode++;
3184      break;      break;
3185    
3186      case OP_NOT_DIGIT:      case OP_NOT_DIGIT:
3187      if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_digit) != 0)      if (eptr >= md->end_subject ||
3188           (md->ctypes[*eptr++] & ctype_digit) != 0)
3189        return FALSE;        return FALSE;
3190      ecode++;      ecode++;
3191      break;      break;
3192    
3193      case OP_DIGIT:      case OP_DIGIT:
3194      if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_digit) == 0)      if (eptr >= md->end_subject ||
3195           (md->ctypes[*eptr++] & ctype_digit) == 0)
3196        return FALSE;        return FALSE;
3197      ecode++;      ecode++;
3198      break;      break;
3199    
3200      case OP_NOT_WHITESPACE:      case OP_NOT_WHITESPACE:
3201      if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_space) != 0)      if (eptr >= md->end_subject ||
3202           (md->ctypes[*eptr++] & ctype_space) != 0)
3203        return FALSE;        return FALSE;
3204      ecode++;      ecode++;
3205      break;      break;
3206    
3207      case OP_WHITESPACE:      case OP_WHITESPACE:
3208      if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_space) == 0)      if (eptr >= md->end_subject ||
3209           (md->ctypes[*eptr++] & ctype_space) == 0)
3210        return FALSE;        return FALSE;
3211      ecode++;      ecode++;
3212      break;      break;
3213    
3214      case OP_NOT_WORDCHAR:      case OP_NOT_WORDCHAR:
3215      if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_word) != 0)      if (eptr >= md->end_subject ||
3216           (md->ctypes[*eptr++] & ctype_word) != 0)
3217        return FALSE;        return FALSE;
3218      ecode++;      ecode++;
3219      break;      break;
3220    
3221      case OP_WORDCHAR:      case OP_WORDCHAR:
3222      if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_word) == 0)      if (eptr >= md->end_subject ||
3223           (md->ctypes[*eptr++] & ctype_word) == 0)
3224        return FALSE;        return FALSE;
3225      ecode++;      ecode++;
3226      break;      break;
# Line 2596  for (;;) Line 3236  for (;;)
3236      case OP_REF:      case OP_REF:
3237        {        {
3238        int length;        int length;
3239        int number = ecode[1] << 1;                /* Doubled reference number */        int offset = ecode[1] << 1;                /* Doubled reference number */
3240        ecode += 2;                                /* Advance past the item */        ecode += 2;                                /* Advance past the item */
3241    
3242        if (number >= offset_top || md->offset_vector[number] < 0)        /* If the reference is unset, set the length to be longer than the amount
3243          {        of subject left; this ensures that every attempt at a match fails. We
3244          md->errorcode = PCRE_ERROR_BADREF;        can't just fail here, because of the possibility of quantifiers with zero
3245          return FALSE;        minima. */
3246          }  
3247          length = (offset >= offset_top || md->offset_vector[offset] < 0)?
3248            md->end_subject - eptr + 1 :
3249            md->offset_vector[offset+1] - md->offset_vector[offset];
3250    
3251        length = md->offset_vector[number+1] - md->offset_vector[number];        /* Set up for repetition, or handle the non-repeated case */
3252    
3253        switch (*ecode)        switch (*ecode)
3254          {          {
# Line 2632  for (;;) Line 3275  for (;;)
3275          break;          break;
3276    
3277          default:               /* No repeat follows */          default:               /* No repeat follows */
3278          if (!match_ref(number, eptr, length, md)) return FALSE;          if (!match_ref(offset, eptr, length, md, ims)) return FALSE;
3279          eptr += length;          eptr += length;
3280          continue;              /* With the main loop */          continue;              /* With the main loop */
3281          }          }
# Line 2648  for (;;) Line 3291  for (;;)
3291    
3292        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
3293          {          {
3294          if (!match_ref(number, eptr, length, md)) return FALSE;          if (!match_ref(offset, eptr, length, md, ims)) return FALSE;
3295          eptr += length;          eptr += length;
3296          }          }
3297    
# Line 2663  for (;;) Line 3306  for (;;)
3306          {          {
3307          for (i = min;; i++)          for (i = min;; i++)
3308            {            {
3309            if (match(eptr, ecode, offset_top, md)) return TRUE;            if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb))
3310            if (i >= max || !match_ref(number, eptr, length, md))              return TRUE;
3311              if (i >= max || !match_ref(offset, eptr, length, md, ims))
3312              return FALSE;              return FALSE;
3313            eptr += length;            eptr += length;
3314            }            }
# Line 2675  for (;;) Line 3319  for (;;)
3319    
3320        else        else
3321          {          {
3322          uschar *pp = eptr;          const uschar *pp = eptr;
3323          for (i = min; i < max; i++)          for (i = min; i < max; i++)
3324            {            {
3325            if (!match_ref(number, eptr, length, md)) break;            if (!match_ref(offset, eptr, length, md, ims)) break;
3326            eptr += length;            eptr += length;
3327            }            }
3328          while (eptr >= pp)          while (eptr >= pp)
3329            {            {
3330            if (match(eptr, ecode, offset_top, md)) return TRUE;            if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb))
3331                return TRUE;
3332            eptr -= length;            eptr -= length;
3333            }            }
3334          return FALSE;          return FALSE;
# Line 2691  for (;;) Line 3336  for (;;)
3336        }        }
3337      /* Control never gets here */      /* Control never gets here */
3338    
3339    
3340    
3341      /* Match a character class, possibly repeatedly. Look past the end of the      /* Match a character class, possibly repeatedly. Look past the end of the
3342      item to see if there is repeat information following. Then obey similar      item to see if there is repeat information following. Then obey similar
3343      code to character type repeats - written out again for speed. If caseless      code to character type repeats - written out again for speed. */
     matching was set at runtime but not at compile time, we have to check both  
     versions of a character. */  
3344    
3345      case OP_CLASS:      case OP_CLASS:
3346        {        {
3347        uschar *data = ecode + 1;  /* Save for matching */        const uschar *data = ecode + 1;  /* Save for matching */
3348        ecode += 33;               /* Advance past the item */        ecode += 33;                     /* Advance past the item */
3349    
3350        switch (*ecode)        switch (*ecode)
3351          {          {
# Line 2727  for (;;) Line 3372  for (;;)
3372          break;          break;
3373    
3374          default:               /* No repeat follows */          default:               /* No repeat follows */
3375          if (eptr >= md->end_subject) return FALSE;          min = max = 1;
3376          c = *eptr++;          break;
         if ((data[c/8] & (1 << (c&7))) != 0) continue;    /* With main loop */  
         if (md->runtime_caseless)  
           {  
           c = pcre_fcc[c];  
           if ((data[c/8] & (1 << (c&7))) != 0) continue;  /* With main loop */  
           }  
         return FALSE;  
3377          }          }
3378    
3379        /* First, ensure the minimum number of matches are present. */        /* First, ensure the minimum number of matches are present. */
# Line 2745  for (;;) Line 3383  for (;;)
3383          if (eptr >= md->end_subject) return FALSE;          if (eptr >= md->end_subject) return FALSE;
3384          c = *eptr++;          c = *eptr++;
3385          if ((data[c/8] & (1 << (c&7))) != 0) continue;          if ((data[c/8] & (1 << (c&7))) != 0) continue;
         if (md->runtime_caseless)  
           {  
           c = pcre_fcc[c];  
           if ((data[c/8] & (1 << (c&7))) != 0) continue;  
           }  
3386          return FALSE;          return FALSE;
3387          }          }
3388    
# Line 2765  for (;;) Line 3398  for (;;)
3398          {          {
3399          for (i = min;; i++)          for (i = min;; i++)
3400            {            {
3401            if (match(eptr, ecode, offset_top, md)) return TRUE;            if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb))
3402                return TRUE;
3403            if (i >= max || eptr >= md->end_subject) return FALSE;            if (i >= max || eptr >= md->end_subject) return FALSE;
3404            c = *eptr++;            c = *eptr++;
3405            if ((data[c/8] & (1 << (c&7))) != 0) continue;            if ((data[c/8] & (1 << (c&7))) != 0) continue;
           if (md->runtime_caseless)  
             {  
             c = pcre_fcc[c];  
             if ((data[c/8] & (1 << (c&7))) != 0) continue;  
             }  
3406            return FALSE;            return FALSE;
3407            }            }
3408          /* Control never gets here */          /* Control never gets here */
# Line 2783  for (;;) Line 3412  for (;;)
3412    
3413        else        else
3414          {          {
3415          uschar *pp = eptr;          const uschar *pp = eptr;
3416          for (i = min; i < max; eptr++, i++)          for (i = min; i < max; eptr++, i++)
3417            {            {
3418            if (eptr >= md->end_subject) break;            if (eptr >= md->end_subject) break;
3419            c = *eptr;            c = *eptr;
3420            if ((data[c/8] & (1 << (c&7))) != 0) continue;            if ((data[c/8] & (1 << (c&7))) != 0) continue;
           if (md->runtime_caseless)  
             {  
             c = pcre_fcc[c];  
             if ((data[c/8] & (1 << (c&7))) != 0) continue;  
             }  
3421            break;            break;
3422            }            }
3423    
3424          while (eptr >= pp)          while (eptr >= pp)
3425            if (match(eptr--, ecode, offset_top, md)) return TRUE;            if (match(eptr--, ecode, offset_top, md, ims, FALSE, eptrb))
3426                return TRUE;
3427          return FALSE;          return FALSE;
3428          }          }
3429        }        }
# Line 2811  for (;;) Line 3436  for (;;)
3436        register int length = ecode[1];        register int length = ecode[1];
3437        ecode += 2;        ecode += 2;
3438    
3439        #ifdef DEBUG  #ifdef DEBUG    /* Sigh. Some compilers never learn. */
3440        if (eptr >= md->end_subject)        if (eptr >= md->end_subject)
3441          printf("matching subject <null> against pattern ");          printf("matching subject <null> against pattern ");
3442        else        else
# Line 2822  for (;;) Line 3447  for (;;)
3447          }          }
3448        pchars(ecode, length, FALSE, md);        pchars(ecode, length, FALSE, md);
3449        printf("\n");        printf("\n");
3450        #endif  #endif
3451    
3452        if (length > md->end_subject - eptr) return FALSE;        if (length > md->end_subject - eptr) return FALSE;
3453        if (md->caseless)        if ((ims & PCRE_CASELESS) != 0)
3454          {          {
3455          while (length-- > 0) if (pcre_lcc[*ecode++] != pcre_lcc[*eptr++]) return FALSE;          while (length-- > 0)
3456              if (md->lcc[*ecode++] != md->lcc[*eptr++])
3457                return FALSE;
3458          }          }
3459        else        else
3460          {          {
# Line 2879  for (;;) Line 3506  for (;;)
3506      maximum. Alternatively, if maximizing, find the maximum number of      maximum. Alternatively, if maximizing, find the maximum number of
3507      characters and work backwards. */      characters and work backwards. */
3508    
3509      #ifdef DEBUG      DPRINTF(("matching %c{%d,%d} against subject %.*s\n", c, min, max,
3510      printf("matching %c{%d,%d} against subject %.*s\n", c, min, max,        max, eptr));
       max, eptr);  
     #endif  
3511    
3512      if (md->caseless)      if ((ims & PCRE_CASELESS) != 0)
3513        {        {
3514        c = pcre_lcc[c];        c = md->lcc[c];
3515        for (i = 1; i <= min; i++) if (c != pcre_lcc[*eptr++]) return FALSE;        for (i = 1; i <= min; i++)
3516            if (c != md->lcc[*eptr++]) return FALSE;
3517        if (min == max) continue;        if (min == max) continue;
3518        if (minimize)        if (minimize)
3519          {          {
3520          for (i = min;; i++)          for (i = min;; i++)
3521            {            {
3522            if (match(eptr, ecode, offset_top, md)) return TRUE;            if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb))
3523            if (i >= max || eptr >= md->end_subject || c != pcre_lcc[*eptr++])              return TRUE;
3524              if (i >= max || eptr >= md->end_subject ||
3525                  c != md->lcc[*eptr++])
3526              return FALSE;              return FALSE;
3527            }            }
3528          /* Control never gets here */          /* Control never gets here */
3529          }          }
3530        else        else
3531          {          {
3532          uschar *pp = eptr;          const uschar *pp = eptr;
3533          for (i = min; i < max; i++)          for (i = min; i < max; i++)
3534            {            {
3535            if (eptr >= md->end_subject || c != pcre_lcc[*eptr]) break;            if (eptr >= md->end_subject || c != md->lcc[*eptr]) break;
3536            eptr++;            eptr++;
3537            }            }
3538          while (eptr >= pp)          while (eptr >= pp)
3539            if (match(eptr--, ecode, offset_top, md)) return TRUE;            if (match(eptr--, ecode, offset_top, md, ims, FALSE, eptrb))
3540                return TRUE;
3541          return FALSE;          return FALSE;
3542          }          }
3543        /* Control never gets here */        /* Control never gets here */
# Line 2924  for (;;) Line 3553  for (;;)
3553          {          {
3554          for (i = min;; i++)          for (i = min;; i++)
3555            {            {
3556            if (match(eptr, ecode, offset_top, md)) return TRUE;            if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb))
3557                return TRUE;
3558            if (i >= max || eptr >= md->end_subject || c != *eptr++) return FALSE;            if (i >= max || eptr >= md->end_subject || c != *eptr++) return FALSE;
3559            }            }
3560          /* Control never gets here */          /* Control never gets here */
3561          }          }
3562        else        else
3563          {          {
3564          uschar *pp = eptr;          const uschar *pp = eptr;
3565          for (i = min; i < max; i++)          for (i = min; i < max; i++)
3566            {            {
3567            if (eptr >= md->end_subject || c != *eptr) break;            if (eptr >= md->end_subject || c != *eptr) break;
3568            eptr++;            eptr++;
3569            }            }
3570          while (eptr >= pp)          while (eptr >= pp)
3571           if (match(eptr--, ecode, offset_top, md)) return TRUE;           if (match(eptr--, ecode, offset_top, md, ims, FALSE, eptrb))
3572               return TRUE;
3573          return FALSE;          return FALSE;
3574          }          }
3575        }        }
# Line 2947  for (;;) Line 3578  for (;;)
3578      /* Match a negated single character */      /* Match a negated single character */
3579    
3580      case OP_NOT:      case OP_NOT:
3581      if (eptr > md->end_subject) return FALSE;      if (eptr >= md->end_subject) return FALSE;
3582      ecode++;      ecode++;
3583      if (md->caseless)      if ((ims & PCRE_CASELESS) != 0)
3584        {        {
3585        if (pcre_lcc[*ecode++] == pcre_lcc[*eptr++]) return FALSE;        if (md->lcc[*ecode++] == md->lcc[*eptr++]) return FALSE;
3586        }        }
3587      else      else
3588        {        {
# Line 3006  for (;;) Line 3637  for (;;)
3637      maximum. Alternatively, if maximizing, find the maximum number of      maximum. Alternatively, if maximizing, find the maximum number of
3638      characters and work backwards. */      characters and work backwards. */
3639    
3640      #ifdef DEBUG      DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", c, min, max,
3641      printf("negative matching %c{%d,%d} against subject %.*s\n", c, min, max,        max, eptr));
       max, eptr);  
     #endif  
3642    
3643      if (md->caseless)      if ((ims & PCRE_CASELESS) != 0)
3644        {        {
3645        c = pcre_lcc[c];        c = md->lcc[c];
3646        for (i = 1; i <= min; i++) if (c == pcre_lcc[*eptr++]) return FALSE;        for (i = 1; i <= min; i++)
3647            if (c == md->lcc[*eptr++]) return FALSE;
3648        if (min == max) continue;        if (min == max) continue;
3649        if (minimize)        if (minimize)
3650          {          {
3651          for (i = min;; i++)          for (i = min;; i++)
3652            {            {
3653            if (match(eptr, ecode, offset_top, md)) return TRUE;            if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb))
3654            if (i >= max || eptr >= md->end_subject || c == pcre_lcc[*eptr++])              return TRUE;
3655              if (i >= max || eptr >= md->end_subject ||
3656                  c == md->lcc[*eptr++])
3657              return FALSE;              return FALSE;
3658            }            }
3659          /* Control never gets here */          /* Control never gets here */
3660          }          }
3661        else        else
3662          {          {
3663          uschar *pp = eptr;          const uschar *pp = eptr;
3664          for (i = min; i < max; i++)          for (i = min; i < max; i++)
3665            {            {
3666            if (eptr >= md->end_subject || c == pcre_lcc[*eptr]) break;            if (eptr >= md->end_subject || c == md->lcc[*eptr]) break;
3667            eptr++;            eptr++;
3668            }            }
3669          while (eptr >= pp)          while (eptr >= pp)
3670            if (match(eptr--, ecode, offset_top, md)) return TRUE;            if (match(eptr--, ecode, offset_top, md, ims, FALSE, eptrb))
3671                return TRUE;
3672          return FALSE;          return FALSE;
3673          }          }
3674        /* Control never gets here */        /* Control never gets here */
# Line 3051  for (;;) Line 3684  for (;;)
3684          {          {
3685          for (i = min;; i++)          for (i = min;; i++)
3686            {            {
3687            if (match(eptr, ecode, offset_top, md)) return TRUE;            if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb))
3688                return TRUE;
3689            if (i >= max || eptr >= md->end_subject || c == *eptr++) return FALSE;            if (i >= max || eptr >= md->end_subject || c == *eptr++) return FALSE;
3690            }            }
3691          /* Control never gets here */          /* Control never gets here */
3692          }          }
3693        else        else
3694          {          {
3695          uschar *pp = eptr;          const uschar *pp = eptr;
3696          for (i = min; i < max; i++)          for (i = min; i < max; i++)
3697            {            {
3698            if (eptr >= md->end_subject || c == *eptr) break;            if (eptr >= md->end_subject || c == *eptr) break;
3699            eptr++;            eptr++;
3700            }            }
3701          while (eptr >= pp)          while (eptr >= pp)
3702           if (match(eptr--, ecode, offset_top, md)) return TRUE;           if (match(eptr--, ecode, offset_top, md, ims, FALSE, eptrb))
3703               return TRUE;
3704          return FALSE;          return FALSE;
3705          }          }
3706        }        }
# Line 3115  for (;;) Line 3750  for (;;)
3750      if (min > 0) switch(ctype)      if (min > 0) switch(ctype)
3751        {        {
3752        case OP_ANY:        case OP_ANY:
3753        if (!md->dotall)        if ((ims & PCRE_DOTALL) == 0)
3754          { for (i = 1; i <= min; i++) if (*eptr++ == '\n') return FALSE; }          { for (i = 1; i <= min; i++) if (*eptr++ == '\n') return FALSE; }
3755        else eptr += min;        else eptr += min;
3756        break;        break;
3757    
3758        case OP_NOT_DIGIT:        case OP_NOT_DIGIT:
3759        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
3760          if ((pcre_ctypes[*eptr++] & ctype_digit) != 0) return FALSE;          if ((md->ctypes[*eptr++] & ctype_digit) != 0) return FALSE;
3761        break;        break;
3762    
3763        case OP_DIGIT:        case OP_DIGIT:
3764        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
3765          if ((pcre_ctypes[*eptr++] & ctype_digit) == 0) return FALSE;          if ((md->ctypes[*eptr++] & ctype_digit) == 0) return FALSE;
3766        break;        break;
3767    
3768        case OP_NOT_WHITESPACE:        case OP_NOT_WHITESPACE:
3769        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
3770          if ((pcre_ctypes[*eptr++] & ctype_space) != 0) return FALSE;          if ((md->ctypes[*eptr++] & ctype_space) != 0) return FALSE;
3771        break;        break;
3772    
3773        case OP_WHITESPACE:        case OP_WHITESPACE:
3774        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
3775          if ((pcre_ctypes[*eptr++] & ctype_space) == 0) return FALSE;          if ((md->ctypes[*eptr++] & ctype_space) == 0) return FALSE;
3776        break;        break;
3777    
3778        case OP_NOT_WORDCHAR:        case OP_NOT_WORDCHAR:
3779        for (i = 1; i <= min; i++) if ((pcre_ctypes[*eptr++] & ctype_word) != 0)        for (i = 1; i <= min; i++)
3780          return FALSE;          if ((md->ctypes[*eptr++] & ctype_word) != 0)
3781              return FALSE;
3782        break;        break;
3783    
3784        case OP_WORDCHAR:        case OP_WORDCHAR:
3785        for (i = 1; i <= min; i++) if ((pcre_ctypes[*eptr++] & ctype_word) == 0)        for (i = 1; i <= min; i++)
3786          return FALSE;          if ((md->ctypes[*eptr++] & ctype_word) == 0)
3787              return FALSE;
3788        break;        break;
3789        }        }
3790    
# Line 3156  for (;;) Line 3793  for (;;)
3793      if (min == max) continue;      if (min == max) continue;
3794    
3795      /* If minimizing, we have to test the rest of the pattern before each      /* If minimizing, we have to test the rest of the pattern before each
3796      subsequent match, so inlining isn't much help; just use the function. */      subsequent match. */
3797    
3798      if (minimize)      if (minimize)
3799        {        {
3800        for (i = min;; i++)        for (i = min;; i++)
3801          {          {
3802          if (match(eptr, ecode, offset_top, md)) return TRUE;          if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb)) return TRUE;
3803          if (i >= max || eptr >= md->end_subject ||          if (i >= max || eptr >= md->end_subject) return FALSE;
3804            !match_type(ctype, *eptr++, md->dotall))  
3805              return FALSE;          c = *eptr++;
3806            switch(ctype)
3807              {
3808              case OP_ANY:
3809              if ((ims & PCRE_DOTALL) == 0 && c == '\n') return FALSE;
3810              break;
3811    
3812              case OP_NOT_DIGIT:
3813              if ((md->ctypes[c] & ctype_digit) != 0) return FALSE;
3814              break;
3815    
3816              case OP_DIGIT:
3817              if ((md->ctypes[c] & ctype_digit) == 0) return FALSE;
3818              break;
3819    
3820              case OP_NOT_WHITESPACE:
3821              if ((md->ctypes[c] & ctype_space) != 0) return FALSE;
3822              break;
3823    
3824              case OP_WHITESPACE:
3825              if  ((md->ctypes[c] & ctype_space) == 0) return FALSE;
3826              break;
3827    
3828              case OP_NOT_WORDCHAR:
3829              if ((md->ctypes[c] & ctype_word) != 0) return FALSE;
3830              break;
3831    
3832              case OP_WORDCHAR:
3833              if ((md->ctypes[c] & ctype_word) == 0) return FALSE;
3834              break;
3835              }
3836          }          }
3837        /* Control never gets here */        /* Control never gets here */
3838        }        }
# Line 3175  for (;;) Line 3842  for (;;)
3842    
3843      else      else
3844        {        {
3845        uschar *pp = eptr;        const uschar *pp = eptr;
3846        switch(ctype)        switch(ctype)
3847          {          {
3848          case OP_ANY:          case OP_ANY:
3849          if (!md->dotall)          if ((ims & PCRE_DOTALL) == 0)
3850            {            {
3851            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3852              {              {
# Line 3198  for (;;) Line 3865  for (;;)
3865          case OP_NOT_DIGIT:          case OP_NOT_DIGIT:
3866          for (i = min; i < max; i++)          for (i = min; i < max; i++)
3867            {            {
3868            if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_digit) != 0)            if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) != 0)
3869              break;              break;
3870            eptr++;            eptr++;
3871            }            }
# Line 3207  for (;;) Line 3874  for (;;)
3874          case OP_DIGIT:          case OP_DIGIT:
3875          for (i = min; i < max; i++)          for (i = min; i < max; i++)
3876            {            {
3877            if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_digit) == 0)            if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) == 0)
3878              break;              break;
3879            eptr++;            eptr++;
3880            }            }
# Line 3216  for (;;) Line 3883  for (;;)
3883          case OP_NOT_WHITESPACE:          case OP_NOT_WHITESPACE:
3884          for (i = min; i < max; i++)          for (i = min; i < max; i++)
3885            {            {
3886            if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_space) != 0)            if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) != 0)
3887              break;              break;
3888            eptr++;            eptr++;
3889            }            }
# Line 3225  for (;;) Line 3892  for (;;)
3892          case OP_WHITESPACE:          case OP_WHITESPACE:
3893          for (i = min; i < max; i++)          for (i = min; i < max; i++)
3894            {            {
3895            if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_space) == 0)            if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) == 0)
3896              break;              break;
3897            eptr++;            eptr++;
3898            }            }
# Line 3234  for (;;) Line 3901  for (;;)
3901          case OP_NOT_WORDCHAR:          case OP_NOT_WORDCHAR:
3902          for (i = min; i < max; i++)          for (i = min; i < max; i++)
3903            {            {
3904            if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_word) != 0)            if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) != 0)
3905              break;              break;
3906            eptr++;            eptr++;
3907            }            }
# Line 3243  for (;;) Line 3910  for (;;)
3910          case OP_WORDCHAR:          case OP_WORDCHAR:
3911          for (i = min; i < max; i++)          for (i = min; i < max; i++)
3912            {            {
3913            if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_word) == 0)            if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) == 0)
3914              break;              break;
3915            eptr++;            eptr++;
3916            }            }
# Line 3251  for (;;) Line 3918  for (;;)
3918          }          }
3919    
3920        while (eptr >= pp)        while (eptr >= pp)
3921          if (match(eptr--, ecode, offset_top, md)) return TRUE;          if (match(eptr--, ecode, offset_top, md, ims, FALSE, eptrb))
3922              return TRUE;
3923        return FALSE;        return FALSE;
3924        }        }
3925      /* Control never gets here */      /* Control never gets here */
# Line 3259  for (;;) Line 3927  for (;;)
3927      /* There's been some horrible disaster. */      /* There's been some horrible disaster. */
3928    
3929      default:      default:
3930      #ifdef DEBUG      DPRINTF(("Unknown opcode %d\n", *ecode));
     printf("Unknown opcode %d\n", *ecode);  
     #endif  
3931      md->errorcode = PCRE_ERROR_UNKNOWN_NODE;      md->errorcode = PCRE_ERROR_UNKNOWN_NODE;
3932      return FALSE;      return FALSE;
3933      }      }
# Line 3276  for (;;) Line 3942  for (;;)
3942    
3943    
3944    
3945    
3946  /*************************************************  /*************************************************
3947  *         Execute a Regular Expression           *  *         Execute a Regular Expression           *
3948  *************************************************/  *************************************************/
# Line 3303  int Line 3970  int
3970  pcre_exec(const pcre *external_re, const pcre_extra *external_extra,  pcre_exec(const pcre *external_re, const pcre_extra *external_extra,
3971    const char *subject, int length, int options, int *offsets, int offsetcount)    const char *subject, int length, int options, int *offsets, int offsetcount)
3972  {  {
3973  int resetcount;  int resetcount, ocount;
 int ocount = offsetcount;  
3974  int first_char = -1;  int first_char = -1;
3975    int ims = 0;
3976  match_data match_block;  match_data match_block;
3977  uschar *start_bits = NULL;  const uschar *start_bits = NULL;
3978  uschar *start_match = (uschar *)subject;  const uschar *start_match = (const uschar *)subject;
3979  uschar *end_subject;  const uschar *end_subject;
3980  real_pcre *re = (real_pcre *)external_re;  const real_pcre *re = (const real_pcre *)external_re;
3981  real_pcre_extra *extra = (real_pcre_extra *)external_extra;  const real_pcre_extra *extra = (const real_pcre_extra *)external_extra;
3982    BOOL using_temporary_offsets = FALSE;
3983  BOOL anchored = ((re->options | options) & PCRE_ANCHORED) != 0;  BOOL anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
3984  BOOL startline = (re->options & PCRE_STARTLINE) != 0;  BOOL startline = (re->options & PCRE_STARTLINE) != 0;
3985    
# Line 3321  if (re == NULL || subject == NULL || Line 3989  if (re == NULL || subject == NULL ||
3989     (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;     (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
3990  if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;  if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;
3991    
3992  match_block.start_subject = (uschar *)subject;  match_block.start_subject = (const uschar *)subject;
3993  match_block.end_subject = match_block.start_subject + length;  match_block.end_subject = match_block.start_subject + length;
3994  end_subject = match_block.end_subject;  end_subject = match_block.end_subject;
3995    
3996  match_block.caseless  = ((re->options | options) & PCRE_CASELESS) != 0;  match_block.endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
 match_block.runtime_caseless = match_block.caseless &&  
   (re->options & PCRE_CASELESS) == 0;  
   
 match_block.multiline = ((re->options | options) & PCRE_MULTILINE) != 0;  
 match_block.dotall    = ((re->options | options) & PCRE_DOTALL) != 0;  
 match_block.endonly   = ((re->options | options) & PCRE_DOLLAR_ENDONLY) != 0;  
3997    
3998  match_block.notbol = (options & PCRE_NOTBOL) != 0;  match_block.notbol = (options & PCRE_NOTBOL) != 0;
3999  match_block.noteol = (options & PCRE_NOTEOL) != 0;  match_block.noteol = (options & PCRE_NOTEOL) != 0;
4000    
4001  match_block.errorcode = PCRE_ERROR_NOMATCH;     /* Default error */  match_block.errorcode = PCRE_ERROR_NOMATCH;     /* Default error */
4002    
4003    match_block.lcc = re->tables + lcc_offset;
4004    match_block.ctypes = re->tables + ctypes_offset;
4005    
4006    /* The ims options can vary during the matching as a result of the presence
4007    of (?ims) items in the pattern. They are kept in a local variable so that
4008    restoring at the exit of a group is easy. */
4009    
4010    ims = re->options & (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL);
4011    
4012  /* If the expression has got more back references than the offsets supplied can  /* If the expression has got more back references than the offsets supplied can
4013  hold, we get a temporary bit of working store to use during the matching.  hold, we get a temporary bit of working store to use during the matching.
4014  Otherwise, we can use the vector supplied, rounding down the size of it to a  Otherwise, we can use the vector supplied, rounding down its size to a multiple
4015  multiple of 2. */  of 3. */
4016    
4017    ocount = offsetcount - (offsetcount % 3);
4018    
4019  ocount &= (-2);  if (re->top_backref > 0 && re->top_backref >= ocount/3)
 if (re->top_backref > 0 && re->top_backref + 1 >= ocount/2)  
4020    {    {
4021    ocount = re->top_backref * 2 + 2;    ocount = re->top_backref * 3 + 3;
4022    match_block.offset_vector = (pcre_malloc)(ocount * sizeof(int));    match_block.offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));
4023    if (match_block.offset_vector == NULL) return PCRE_ERROR_NOMEMORY;    if (match_block.offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
4024    #ifdef DEBUG    using_temporary_offsets = TRUE;
4025    printf("Got memory to hold back references\n");    DPRINTF(("Got memory to hold back references\n"));
   #endif  
4026    }    }
4027  else match_block.offset_vector = offsets;  else match_block.offset_vector = offsets;
4028    
4029  match_block.offset_end = ocount;  match_block.offset_end = ocount;
4030    match_block.offset_max = (2*ocount)/3;
4031  match_block.offset_overflow = FALSE;  match_block.offset_overflow = FALSE;
4032    
4033  /* Compute the minimum number of offsets that we need to reset each time. Doing  /* Compute the minimum number of offsets that we need to reset each time. Doing
# Line 3365  in the pattern. */ Line 4037  in the pattern. */
4037  resetcount = 2 + re->top_bracket * 2;  resetcount = 2 + re->top_bracket * 2;
4038  if (resetcount > offsetcount) resetcount = ocount;  if (resetcount > offsetcount) resetcount = ocount;
4039    
4040  /* If MULTILINE is set at exec time but was not set at compile time, and the  /* Reset the working variable associated with each extraction. These should
4041  anchored flag is set, we must re-check because a setting provoked by ^ in the  never be used unless previously set, but they get saved and restored, and so we
4042  pattern is not right in multi-line mode. Calling is_anchored() again here does  initialize them to avoid reading uninitialized locations. */
 the right check, because multiline is now set. If it now yields FALSE, the  
 expression must have had ^ starting some of its branches. Check to see if  
 that is true for *all* branches, and if so, set the startline flag. */  
4043    
4044  if (match_block. multiline && anchored && (re->options & PCRE_MULTILINE) == 0 &&  if (match_block.offset_vector != NULL)
     !is_anchored(re->code, match_block.multiline))  
4045    {    {
4046    anchored = FALSE;    register int *iptr = match_block.offset_vector + ocount;
4047    if (is_startline(re->code)) startline = TRUE;    register int *iend = iptr - resetcount/2 + 1;
4048      while (--iptr >= iend) *iptr = -1;
4049    }    }
4050    
4051  /* Set up the first character to match, if available. The first_char value is  /* Set up the first character to match, if available. The first_char value is
4052  never set for an anchored regular expression, but the anchoring may be forced  never set for an anchored regular expression, but the anchoring may be forced
4053  at run time, so we have to test for anchoring. The first char may be unset for  at run time, so we have to test for anchoring. The first char may be unset for
4054  an unanchored pattern, of course. If there's no first char and the pattern was  an unanchored pattern, of course. If there's no first char and the pattern was
4055  studied, the may be a bitmap of possible first characters. However, we can  studied, there may be a bitmap of possible first characters. */
 use this only if the caseless state of the studying was correct. */  
4056    
4057  if (!anchored)  if (!anchored)
4058    {    {
4059    if ((re->options & PCRE_FIRSTSET) != 0)    if ((re->options & PCRE_FIRSTSET) != 0)
4060      {      {
4061      first_char = re->first_char;      first_char = re->first_char;
4062      if (match_block.caseless) first_char = pcre_lcc[first_char];      if ((ims & PCRE_CASELESS) != 0) first_char = match_block.lcc[first_char];
4063      }      }
4064    else    else
4065      if (!startline && extra != NULL &&      if (!startline && extra != NULL &&
4066        (extra->options & PCRE_STUDY_MAPPED) != 0 &&        (extra->options & PCRE_STUDY_MAPPED) != 0)
       ((extra->options & PCRE_STUDY_CASELESS) != 0) == match_block.caseless)  
4067          start_bits = extra->start_bits;          start_bits = extra->start_bits;
4068    }    }
4069    
# Line 3404  if (!anchored) Line 4071  if (!anchored)
4071    
4072  do  do
4073    {    {
4074      int rc;
4075    register int *iptr = match_block.offset_vector;    register int *iptr = match_block.offset_vector;
4076    register int *iend = iptr + resetcount;    register int *iend = iptr + resetcount;
4077    
# Line 3415  do Line 4083  do
4083    
4084    if (first_char >= 0)    if (first_char >= 0)
4085      {      {
4086      if (match_block.caseless)      if ((ims & PCRE_CASELESS) != 0)
4087        while (start_match < end_subject && pcre_lcc[*start_match] != first_char)        while (start_match < end_subject &&
4088                 match_block.lcc[*start_match] != first_char)
4089          start_match++;          start_match++;
4090      else      else
4091        while (start_match < end_subject && *start_match != first_char)        while (start_match < end_subject && *start_match != first_char)
# Line 3445  do Line 4114  do
4114        }        }
4115      }      }
4116    
4117    #ifdef DEBUG  #ifdef DEBUG  /* Sigh. Some compilers never learn. */
4118    printf(">>>> Match against: ");    printf(">>>> Match against: ");
4119    pchars(start_match, end_subject - start_match, TRUE, &match_block);    pchars(start_match, end_subject - start_match, TRUE, &match_block);
4120    printf("\n");    printf("\n");
4121    #endif  #endif
4122    
4123    /* When a match occurs, substrings will be set for all internal extractions;    /* When a match occurs, substrings will be set for all internal extractions;
4124    we just need to set up the whole thing as substring 0 before returning. If    we just need to set up the whole thing as substring 0 before returning. If
4125    there were too many extractions, set the return code to zero. In the case    there were too many extractions, set the return code to zero. In the case
4126    where we had to get some local store to hold offsets for backreferences, copy    where we had to get some local store to hold offsets for backreferences, copy
4127    those back references that we can. In this case there need not be overflow    those back references that we can. In this case there need not be overflow
4128    if certain parts of the pattern were not used.    if certain parts of the pattern were not used. */
4129    
4130      if (!match(start_match, re->code, 2, &match_block, ims, FALSE, start_match))
4131        continue;
4132    
4133    Before starting the match, we have to set up a longjmp() target to enable    /* Copy the offset information from temporary store if necessary */
   the "cut" operation to fail a match completely without backtracking. */  
4134    
4135    if (setjmp(match_block.fail_env) == 0 &&    if (using_temporary_offsets)
       match(start_match, re->code, 2, &match_block))  
4136      {      {
4137      int rc;      if (offsetcount >= 4)
   
     if (ocount != offsetcount)  
4138        {        {
4139        if (offsetcount >= 4)        memcpy(offsets + 2, match_block.offset_vector + 2,
4140          {          (offsetcount - 2) * sizeof(int));
4141          memcpy(offsets + 2, match_block.offset_vector + 2,        DPRINTF(("Copied offsets from temporary memory\n"));
           (offsetcount - 2) * sizeof(int));  
         #ifdef DEBUG  
         printf("Copied offsets; freeing temporary memory\n");  
         #endif  
         }  
       if (match_block.end_offset_top > offsetcount)  
         match_block.offset_overflow = TRUE;  
   
       #ifdef DEBUG  
       printf("Freeing temporary memory\n");  
       #endif  
   
       (pcre_free)(match_block.offset_vector);  
4142        }        }
4143        if (match_block.end_offset_top > offsetcount)
4144          match_block.offset_overflow = TRUE;
4145    
4146      rc = match_block.offset_overflow? 0 : match_block.end_offset_top/2;      DPRINTF(("Freeing temporary memory\n"));
4147        (pcre_free)(match_block.offset_vector);
4148        }
4149    
4150      if (match_block.offset_end < 2) rc = 0; else    rc = match_block.offset_overflow? 0 : match_block.end_offset_top/2;
       {  
       offsets[0] = start_match - match_block.start_subject;  
       offsets[1] = match_block.end_match_ptr - match_block.start_subject;  
       }  
4151    
4152      #ifdef DEBUG    if (match_block.offset_end < 2) rc = 0; else
4153      printf(">>>> returning %d\n", rc);      {
4154      #endif      offsets[0] = start_match - match_block.start_subject;
4155      return rc;      offsets[1] = match_block.end_match_ptr - match_block.start_subject;
4156      }      }
4157    
4158      DPRINTF((">>>> returning %d\n", rc));
4159      return rc;
4160    }    }
4161    
4162    /* This "while" is the end of the "do" above */
4163    
4164  while (!anchored &&  while (!anchored &&
4165         match_block.errorcode == PCRE_ERROR_NOMATCH &&         match_block.errorcode == PCRE_ERROR_NOMATCH &&
4166         start_match++ < end_subject);         start_match++ < end_subject);
4167    
4168  #ifdef DEBUG  if (using_temporary_offsets)
4169  printf(">>>> returning %d\n", match_block.errorcode);    {
4170  #endif    DPRINTF(("Freeing temporary memory\n"));
4171      (pcre_free)(match_block.offset_vector);
4172      }
4173    
4174    DPRINTF((">>>> returning %d\n", match_block.errorcode));
4175    
4176  return match_block.errorcode;  return match_block.errorcode;
4177  }  }

Legend:
Removed from v.5  
changed lines
  Added in v.29

  ViewVC Help
Powered by ViewVC 1.1.5