/[pcre]/code/trunk/pcre.c
ViewVC logotype

Diff of /code/trunk/pcre.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 13 by nigel, Sat Feb 24 21:38:21 2007 UTC revision 27 by nigel, Sat Feb 24 21:38:49 2007 UTC
# Line 9  the file Tech.Notes for some information Line 9  the file Tech.Notes for some information
9    
10  Written by: Philip Hazel <ph10@cam.ac.uk>  Written by: Philip Hazel <ph10@cam.ac.uk>
11    
12             Copyright (c) 1997 University of Cambridge             Copyright (c) 1997-1999 University of Cambridge
13    
14  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
15  Permission is granted to anyone to use this software for any purpose on any  Permission is granted to anyone to use this software for any purpose on any
# Line 33  restrictions: Line 33  restrictions:
33    
34  /* #define DEBUG */  /* #define DEBUG */
35    
36  /* Use a macro for debugging printing, 'cause that eliminates the the use  /* Use a macro for debugging printing, 'cause that eliminates the use of #ifdef
37  of #ifdef inline, and there are *still* stupid compilers about that don't like  inline, and there are *still* stupid compilers about that don't like indented
38  indented pre-processor statements. I suppose it's only been 10 years... */  pre-processor statements. I suppose it's only been 10 years... */
39    
40  #ifdef DEBUG  #ifdef DEBUG
41  #define DPRINTF(p) printf p  #define DPRINTF(p) printf p
# Line 49  the external pcre header. */ Line 49  the external pcre header. */
49  #include "internal.h"  #include "internal.h"
50    
51    
52    /* Allow compilation as C++ source code, should anybody want to do that. */
53    
54    #ifdef __cplusplus
55    #define class pcre_class
56    #endif
57    
58    
59    /* Number of items on the nested bracket stacks at compile time. This should
60    not be set greater than 200. */
61    
62    #define BRASTACK_SIZE 200
63    
64    
65  /* Min and max values for the common repeats; for the maxima, 0 => infinity */  /* Min and max values for the common repeats; for the maxima, 0 => infinity */
66    
67  static char rep_min[] = { 0, 0, 1, 1, 0, 0 };  static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
68  static char rep_max[] = { 0, 0, 0, 0, 1, 1 };  static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
69    
70  /* Text forms of OP_ values and things, for debugging (not all used) */  /* Text forms of OP_ values and things, for debugging (not all used) */
71    
72  #ifdef DEBUG  #ifdef DEBUG
73  static const char *OP_names[] = {  static const char *OP_names[] = {
74    "End", "\\A", "\\B", "\\b", "\\D", "\\d",    "End", "\\A", "\\B", "\\b", "\\D", "\\d",
75    "\\S", "\\s", "\\W", "\\w", "Cut", "\\Z", "^", "$", "Any", "chars",    "\\S", "\\s", "\\W", "\\w", "\\Z", "\\z",
76    "not",    "Opt", "^", "$", "Any", "chars", "not",
77    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
78    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
79    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
80    "*", "*?", "+", "+?", "?", "??", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{",
81    "class", "negclass", "Ref",    "class", "Ref",
82    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", "Once",    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",
83      "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",
84    "Brazero", "Braminzero", "Bra"    "Brazero", "Braminzero", "Bra"
85  };  };
86  #endif  #endif
# Line 76  are simple data values; negative values Line 90  are simple data values; negative values
90  on. Zero means further processing is needed (for things like \x), or the escape  on. Zero means further processing is needed (for things like \x), or the escape
91  is invalid. */  is invalid. */
92    
93  static short int escapes[] = {  static const short int escapes[] = {
94      0,      0,      0,      0,      0,      0,      0,      0,   /* 0 - 7 */      0,      0,      0,      0,      0,      0,      0,      0,   /* 0 - 7 */
95      0,      0,    ':',    ';',    '<',    '=',    '>',    '?',   /* 8 - ? */      0,      0,    ':',    ';',    '<',    '=',    '>',    '?',   /* 8 - ? */
96    '@', -ESC_A, -ESC_B,      0, -ESC_D,      0,      0,      0,   /* @ - G */    '@', -ESC_A, -ESC_B,      0, -ESC_D,      0,      0,      0,   /* @ - G */
# Line 86  static short int escapes[] = { Line 100  static short int escapes[] = {
100    '`',      7, -ESC_b,      0, -ESC_d,     27,   '\f',      0,   /* ` - g */    '`',      7, -ESC_b,      0, -ESC_d,     27,   '\f',      0,   /* ` - g */
101      0,      0,      0,      0,      0,      0,   '\n',      0,   /* h - o */      0,      0,      0,      0,      0,      0,   '\n',      0,   /* h - o */
102      0,      0,   '\r', -ESC_s,   '\t',      0,      0, -ESC_w,   /* p - w */      0,      0,   '\r', -ESC_s,   '\t',      0,      0, -ESC_w,   /* p - w */
103      0,      0,      0                                            /* x - z */      0,      0, -ESC_z                                            /* x - z */
104  };  };
105    
106  /* Definition to allow mutual recursion */  /* Definition to allow mutual recursion */
107    
108  static BOOL  static BOOL
109    compile_regex(int, int *, uschar **, const uschar **, const char **);    compile_regex(int, int, int *, uschar **, const uschar **, const char **,
110        BOOL, int, compile_data *);
 /* Structure for passing "static" information around between the functions  
 doing the matching, so that they are thread-safe. */  
   
 typedef struct match_data {  
   int    errorcode;             /* As it says */  
   int   *offset_vector;         /* Offset vector */  
   int    offset_end;            /* One past the end */  
   BOOL   offset_overflow;       /* Set if too many extractions */  
   BOOL   caseless;              /* Case-independent flag */  
   BOOL   runtime_caseless;      /* Caseless forced at run time */  
   BOOL   multiline;             /* Multiline flag */  
   BOOL   notbol;                /* NOTBOL flag */  
   BOOL   noteol;                /* NOTEOL flag */  
   BOOL   dotall;                /* Dot matches any char */  
   BOOL   endonly;               /* Dollar not before final \n */  
   const uschar *start_subject;  /* Start of the subject string */  
   const uschar *end_subject;    /* End of the subject string */  
   jmp_buf fail_env;             /* Environment for longjump() break out */  
   const uschar *end_match_ptr;  /* Subject position at end match */  
   int     end_offset_top;       /* Highwater mark at end of match */  
 } match_data;  
111    
112    
113    
# Line 134  void  (*pcre_free)(void *) = free; Line 127  void  (*pcre_free)(void *) = free;
127    
128    
129  /*************************************************  /*************************************************
130    *             Default character tables           *
131    *************************************************/
132    
133    /* A default set of character tables is included in the PCRE binary. Its source
134    is built by the maketables auxiliary program, which uses the default C ctypes
135    functions, and put in the file chartables.c. These tables are used by PCRE
136    whenever the caller of pcre_compile() does not provide an alternate set of
137    tables. */
138    
139    #include "chartables.c"
140    
141    
142    
143    /*************************************************
144  *          Return version string                 *  *          Return version string                 *
145  *************************************************/  *************************************************/
146    
# Line 211  while (length-- > 0) Line 218  while (length-- > 0)
218    
219    
220  /*************************************************  /*************************************************
 *         Check subpattern for empty operand     *  
 *************************************************/  
   
 /* This function checks a bracketed subpattern to see if any of the paths  
 through it could match an empty string. This is used to diagnose an error if  
 such a subpattern is followed by a quantifier with an unlimited upper bound.  
   
 Argument:  
   code      points to the opening bracket  
   
 Returns:    TRUE or FALSE  
 */  
   
 static BOOL  
 could_be_empty(uschar *code)  
 {  
 do {  
   uschar *cc = code + 3;  
   
   /* Scan along the opcodes for this branch; as soon as we find something  
   that matches a non-empty string, break out and advance to test the next  
   branch. If we get to the end of the branch, return TRUE for the whole  
   sub-expression. */  
   
   for (;;)  
     {  
     /* Test an embedded subpattern; if it could not be empty, break the  
     loop. Otherwise carry on in the branch. */  
   
     if ((int)(*cc) >= OP_BRA || (int)(*cc) == OP_ONCE)  
       {  
       if (!could_be_empty(cc)) break;  
       do cc += (cc[1] << 8) + cc[2]; while (*cc == OP_ALT);  
       cc += 3;  
       }  
   
     else switch (*cc)  
       {  
       /* Reached end of a branch: the subpattern may match the empty string */  
   
       case OP_ALT:  
       case OP_KET:  
       case OP_KETRMAX:  
       case OP_KETRMIN:  
       return TRUE;  
   
       /* Skip over assertive subpatterns */  
   
       case OP_ASSERT:  
       case OP_ASSERT_NOT:  
       do cc += (cc[1] << 8) + cc[2]; while (*cc == OP_ALT);  
       cc += 3;  
       break;  
   
       /* Skip over things that don't match chars */  
   
       case OP_SOD:  
       case OP_EOD:  
       case OP_CIRC:  
       case OP_DOLL:  
       case OP_BRAZERO:  
       case OP_BRAMINZERO:  
       case OP_NOT_WORD_BOUNDARY:  
       case OP_WORD_BOUNDARY:  
       cc++;  
       break;  
   
       /* Skip over simple repeats with zero lower bound */  
   
       case OP_STAR:  
       case OP_MINSTAR:  
       case OP_QUERY:  
       case OP_MINQUERY:  
       case OP_NOTSTAR:  
       case OP_NOTMINSTAR:  
       case OP_NOTQUERY:  
       case OP_NOTMINQUERY:  
       case OP_TYPESTAR:  
       case OP_TYPEMINSTAR:  
       case OP_TYPEQUERY:  
       case OP_TYPEMINQUERY:  
       cc += 2;  
       break;  
   
       /* Skip over UPTOs (lower bound is zero) */  
   
       case OP_UPTO:  
       case OP_MINUPTO:  
       case OP_TYPEUPTO:  
       case OP_TYPEMINUPTO:  
       cc += 4;  
       break;  
   
       /* Check a class or a back reference for a zero minimum */  
   
       case OP_CLASS:  
       case OP_NEGCLASS:  
       case OP_REF:  
       cc += (*cc == OP_REF)? 2 : 33;  
   
       switch (*cc)  
         {  
         case OP_CRSTAR:  
         case OP_CRMINSTAR:  
         case OP_CRQUERY:  
         case OP_CRMINQUERY:  
         cc++;  
         break;  
   
         case OP_CRRANGE:  
         case OP_CRMINRANGE:  
         if ((cc[1] << 8) + cc[2] != 0) goto NEXT_BRANCH;  
         cc += 3;  
         break;  
   
         default:  
         goto NEXT_BRANCH;  
         }  
       break;  
   
       /* Anything else matches at least one character */  
   
       default:  
       goto NEXT_BRANCH;  
       }  
     }  
   
   NEXT_BRANCH:  
   code += (code[1] << 8) + code[2];  
   }  
 while (*code == OP_ALT);  
   
 /* No branches match the empty string */  
   
 return FALSE;  
 }  
   
   
   
 /*************************************************  
221  *            Handle escapes                      *  *            Handle escapes                      *
222  *************************************************/  *************************************************/
223    
# Line 366  Arguments: Line 233  Arguments:
233    bracount   number of previous extracting brackets    bracount   number of previous extracting brackets
234    options    the options bits    options    the options bits
235    isclass    TRUE if inside a character class    isclass    TRUE if inside a character class
236      cd         pointer to char tables block
237    
238  Returns:     zero or positive => a data character  Returns:     zero or positive => a data character
239               negative => a special escape sequence               negative => a special escape sequence
# Line 374  Returns:     zero or positive => a data Line 242  Returns:     zero or positive => a data
242    
243  static int  static int
244  check_escape(const uschar **ptrptr, const char **errorptr, int bracount,  check_escape(const uschar **ptrptr, const char **errorptr, int bracount,
245    int options, BOOL isclass)    int options, BOOL isclass, compile_data *cd)
246  {  {
247  const uschar *ptr = *ptrptr;  const uschar *ptr = *ptrptr;
248  int c = *(++ptr) & 255;   /* Ensure > 0 on signed-char systems */  int c = *(++ptr) & 255;   /* Ensure > 0 on signed-char systems */
# Line 417  else Line 285  else
285        {        {
286        oldptr = ptr;        oldptr = ptr;
287        c -= '0';        c -= '0';
288        while ((pcre_ctypes[ptr[1]] & ctype_digit) != 0)        while ((cd->ctypes[ptr[1]] & ctype_digit) != 0)
289          c = c * 10 + *(++ptr) - '0';          c = c * 10 + *(++ptr) - '0';
290        if (c < 10 || c <= bracount)        if (c < 10 || c <= bracount)
291          {          {
# Line 443  else Line 311  else
311    
312      case '0':      case '0':
313      c -= '0';      c -= '0';
314      while(i++ < 2 && (pcre_ctypes[ptr[1]] & ctype_digit) != 0 &&      while(i++ < 2 && (cd->ctypes[ptr[1]] & ctype_digit) != 0 &&
315        ptr[1] != '8' && ptr[1] != '9')        ptr[1] != '8' && ptr[1] != '9')
316          c = c * 8 + *(++ptr) - '0';          c = c * 8 + *(++ptr) - '0';
317      break;      break;
# Line 452  else Line 320  else
320    
321      case 'x':      case 'x':
322      c = 0;      c = 0;
323      while (i++ < 2 && (pcre_ctypes[ptr[1]] & ctype_xdigit) != 0)      while (i++ < 2 && (cd->ctypes[ptr[1]] & ctype_xdigit) != 0)
324        {        {
325        ptr++;        ptr++;
326        c = c * 16 + pcre_lcc[*ptr] -        c = c * 16 + cd->lcc[*ptr] -
327          (((pcre_ctypes[*ptr] & ctype_digit) != 0)? '0' : 'W');          (((cd->ctypes[*ptr] & ctype_digit) != 0)? '0' : 'W');
328        }        }
329      break;      break;
330    
# Line 470  else Line 338  else
338    
339      /* A letter is upper-cased; then the 0x40 bit is flipped */      /* A letter is upper-cased; then the 0x40 bit is flipped */
340    
341      if (c >= 'a' && c <= 'z') c = pcre_fcc[c];      if (c >= 'a' && c <= 'z') c = cd->fcc[c];
342      c ^= 0x40;      c ^= 0x40;
343      break;      break;
344    
345      /* PCRE_EXTRA enables extensions to Perl in the matter of escapes. Any      /* PCRE_EXTRA enables extensions to Perl in the matter of escapes. Any
346      other alphameric following \ is an error if PCRE_EXTRA was set; otherwise,      other alphameric following \ is an error if PCRE_EXTRA was set; otherwise,
347      for Perl compatibility, it is a literal. */      for Perl compatibility, it is a literal. This code looks a bit odd, but
348        there used to be some cases other than the default, and there may be again
349        in future, so I haven't "optimized" it. */
350    
351      default:      default:
352      if ((options & PCRE_EXTRA) != 0) switch(c)      if ((options & PCRE_EXTRA) != 0) switch(c)
353        {        {
       case 'X':  
       c = -ESC_X;      /* This could be a lookup if it ever got into Perl */  
       break;  
   
354        default:        default:
355        *errorptr = ERR3;        *errorptr = ERR3;
356        break;        break;
# Line 510  where the ddds are digits. Line 376  where the ddds are digits.
376    
377  Arguments:  Arguments:
378    p         pointer to the first char after '{'    p         pointer to the first char after '{'
379      cd        pointer to char tables block
380    
381  Returns:    TRUE or FALSE  Returns:    TRUE or FALSE
382  */  */
383    
384  static BOOL  static BOOL
385  is_counted_repeat(const uschar *p)  is_counted_repeat(const uschar *p, compile_data *cd)
386  {  {
387  if ((pcre_ctypes[*p++] & ctype_digit) == 0) return FALSE;  if ((cd->ctypes[*p++] & ctype_digit) == 0) return FALSE;
388  while ((pcre_ctypes[*p] & ctype_digit) != 0) p++;  while ((cd->ctypes[*p] & ctype_digit) != 0) p++;
389  if (*p == '}') return TRUE;  if (*p == '}') return TRUE;
390    
391  if (*p++ != ',') return FALSE;  if (*p++ != ',') return FALSE;
392  if (*p == '}') return TRUE;  if (*p == '}') return TRUE;
393    
394  if ((pcre_ctypes[*p++] & ctype_digit) == 0) return FALSE;  if ((cd->ctypes[*p++] & ctype_digit) == 0) return FALSE;
395  while ((pcre_ctypes[*p] & ctype_digit) != 0) p++;  while ((cd->ctypes[*p] & ctype_digit) != 0) p++;
396  return (*p == '}');  return (*p == '}');
397  }  }
398    
# Line 545  Arguments: Line 412  Arguments:
412    maxp       pointer to int for max    maxp       pointer to int for max
413               returned as -1 if no max               returned as -1 if no max
414    errorptr   points to pointer to error message    errorptr   points to pointer to error message
415      cd         pointer to character tables clock
416    
417  Returns:     pointer to '}' on success;  Returns:     pointer to '}' on success;
418               current ptr on error, with errorptr set               current ptr on error, with errorptr set
419  */  */
420    
421  static const uschar *  static const uschar *
422  read_repeat_counts(const uschar *p, int *minp, int *maxp, const char **errorptr)  read_repeat_counts(const uschar *p, int *minp, int *maxp,
423      const char **errorptr, compile_data *cd)
424  {  {
425  int min = 0;  int min = 0;
426  int max = -1;  int max = -1;
427    
428  while ((pcre_ctypes[*p] & ctype_digit) != 0) min = min * 10 + *p++ - '0';  while ((cd->ctypes[*p] & ctype_digit) != 0) min = min * 10 + *p++ - '0';
429    
430  if (*p == '}') max = min; else  if (*p == '}') max = min; else
431    {    {
432    if (*(++p) != '}')    if (*(++p) != '}')
433      {      {
434      max = 0;      max = 0;
435      while((pcre_ctypes[*p] & ctype_digit) != 0) max = max * 10 + *p++ - '0';      while((cd->ctypes[*p] & ctype_digit) != 0) max = max * 10 + *p++ - '0';
436      if (max < min)      if (max < min)
437        {        {
438        *errorptr = ERR4;        *errorptr = ERR4;
# Line 588  return p; Line 457  return p;
457    
458    
459  /*************************************************  /*************************************************
460    *        Find the fixed length of a pattern      *
461    *************************************************/
462    
463    /* Scan a pattern and compute the fixed length of subject that will match it,
464    if the length is fixed. This is needed for dealing with backward assertions.
465    
466    Arguments:
467      code     points to the start of the pattern (the bracket)
468    
469    Returns:   the fixed length, or -1 if there is no fixed length
470    */
471    
472    static int
473    find_fixedlength(uschar *code)
474    {
475    int length = -1;
476    
477    register int branchlength = 0;
478    register uschar *cc = code + 3;
479    
480    /* Scan along the opcodes for this branch. If we get to the end of the
481    branch, check the length against that of the other branches. */
482    
483    for (;;)
484      {
485      int d;
486      register int op = *cc;
487      if (op >= OP_BRA) op = OP_BRA;
488    
489      switch (op)
490        {
491        case OP_BRA:
492        case OP_ONCE:
493        case OP_COND:
494        d = find_fixedlength(cc);
495        if (d < 0) return -1;
496        branchlength += d;
497        do cc += (cc[1] << 8) + cc[2]; while (*cc == OP_ALT);
498        cc += 3;
499        break;
500    
501        /* Reached end of a branch; if it's a ket it is the end of a nested
502        call. If it's ALT it is an alternation in a nested call. If it is
503        END it's the end of the outer call. All can be handled by the same code. */
504    
505        case OP_ALT:
506        case OP_KET:
507        case OP_KETRMAX:
508        case OP_KETRMIN:
509        case OP_END:
510        if (length < 0) length = branchlength;
511          else if (length != branchlength) return -1;
512        if (*cc != OP_ALT) return length;
513        cc += 3;
514        branchlength = 0;
515        break;
516    
517        /* Skip over assertive subpatterns */
518    
519        case OP_ASSERT:
520        case OP_ASSERT_NOT:
521        case OP_ASSERTBACK:
522        case OP_ASSERTBACK_NOT:
523        do cc += (cc[1] << 8) + cc[2]; while (*cc == OP_ALT);
524        cc += 3;
525        break;
526    
527        /* Skip over things that don't match chars */
528    
529        case OP_REVERSE:
530        cc++;
531    
532        case OP_CREF:
533        case OP_OPT:
534        cc++;
535        /* Fall through */
536    
537        case OP_SOD:
538        case OP_EOD:
539        case OP_EODN:
540        case OP_CIRC:
541        case OP_DOLL:
542        case OP_NOT_WORD_BOUNDARY:
543        case OP_WORD_BOUNDARY:
544        cc++;
545        break;
546    
547        /* Handle char strings */
548    
549        case OP_CHARS:
550        branchlength += *(++cc);
551        cc += *cc + 1;
552        break;
553    
554        /* Handle exact repetitions */
555    
556        case OP_EXACT:
557        case OP_TYPEEXACT:
558        branchlength += (cc[1] << 8) + cc[2];
559        cc += 4;
560        break;
561    
562        /* Handle single-char matchers */
563    
564        case OP_NOT_DIGIT:
565        case OP_DIGIT:
566        case OP_NOT_WHITESPACE:
567        case OP_WHITESPACE:
568        case OP_NOT_WORDCHAR:
569        case OP_WORDCHAR:
570        case OP_ANY:
571        branchlength++;
572        cc++;
573        break;
574    
575    
576        /* Check a class for variable quantification */
577    
578        case OP_CLASS:
579        cc += (*cc == OP_REF)? 2 : 33;
580    
581        switch (*cc)
582          {
583          case OP_CRSTAR:
584          case OP_CRMINSTAR:
585          case OP_CRQUERY:
586          case OP_CRMINQUERY:
587          return -1;
588    
589          case OP_CRRANGE:
590          case OP_CRMINRANGE:
591          if ((cc[1] << 8) + cc[2] != (cc[3] << 8) + cc[4]) return -1;
592          branchlength += (cc[1] << 8) + cc[2];
593          cc += 5;
594          break;
595    
596          default:
597          branchlength++;
598          }
599        break;
600    
601        /* Anything else is variable length */
602    
603        default:
604        return -1;
605        }
606      }
607    /* Control never gets here */
608    }
609    
610    
611    
612    
613    /*************************************************
614  *           Compile one branch                   *  *           Compile one branch                   *
615  *************************************************/  *************************************************/
616    
617  /* Scan the pattern, compiling it into the code vector.  /* Scan the pattern, compiling it into the code vector.
618    
619  Arguments:  Arguments:
620    options    the option bits    options      the option bits
621    bracket    points to number of brackets used    brackets     points to number of brackets used
622    code       points to the pointer to the current code point    code         points to the pointer to the current code point
623    ptrptr     points to the current pattern pointer    ptrptr       points to the current pattern pointer
624    errorptr   points to pointer to error message    errorptr     points to pointer to error message
625      optchanged   set to the value of the last OP_OPT item compiled
626      cd           contains pointers to tables
627    
628  Returns:     TRUE on success  Returns:       TRUE on success
629               FALSE, with *errorptr set on error                 FALSE, with *errorptr set on error
630  */  */
631    
632  static BOOL  static BOOL
633  compile_branch(int options, int *brackets, uschar **codeptr,  compile_branch(int options, int *brackets, uschar **codeptr,
634    const uschar **ptrptr, const char **errorptr)    const uschar **ptrptr, const char **errorptr, int *optchanged,
635      compile_data *cd)
636  {  {
637  int repeat_type, op_type;  int repeat_type, op_type;
638  int repeat_min, repeat_max;  int repeat_min, repeat_max;
639  int bravalue, length;  int bravalue, length;
640    int greedy_default, greedy_non_default;
641  register int c;  register int c;
642  register uschar *code = *codeptr;  register uschar *code = *codeptr;
643    uschar *tempcode;
644  const uschar *ptr = *ptrptr;  const uschar *ptr = *ptrptr;
645  const uschar *oldptr;  const uschar *tempptr;
646  uschar *previous = NULL;  uschar *previous = NULL;
647  uschar class[32];  uschar class[32];
648    
649    /* Set up the default and non-default settings for greediness */
650    
651    greedy_default = ((options & PCRE_UNGREEDY) != 0);
652    greedy_non_default = greedy_default ^ 1;
653    
654  /* Switch on next character until the end of the branch */  /* Switch on next character until the end of the branch */
655    
656  for (;; ptr++)  for (;; ptr++)
657    {    {
658    BOOL negate_class;    BOOL negate_class;
659    int  class_charcount;    int class_charcount;
660    int  class_lastchar;    int class_lastchar;
661      int newoptions;
662      int condref;
663    
664    c = *ptr;    c = *ptr;
665    if ((options & PCRE_EXTENDED) != 0)    if ((options & PCRE_EXTENDED) != 0)
666      {      {
667      if ((pcre_ctypes[c] & ctype_space) != 0) continue;      if ((cd->ctypes[c] & ctype_space) != 0) continue;
668      if (c == '#')      if (c == '#')
669        {        {
670        while ((c = *(++ptr)) != 0 && c != '\n');        while ((c = *(++ptr)) != 0 && c != '\n');
# Line 672  for (;; ptr++) Line 707  for (;; ptr++)
707    
708      case '[':      case '[':
709      previous = code;      previous = code;
710        *code++ = OP_CLASS;
711    
712      /* If the first character is '^', set the negation flag, and use a      /* If the first character is '^', set the negation flag and skip it. */
     different opcode. This only matters if caseless matching is specified at  
     runtime. */  
713    
714      if ((c = *(++ptr)) == '^')      if ((c = *(++ptr)) == '^')
715        {        {
716        negate_class = TRUE;        negate_class = TRUE;
       *code++ = OP_NEGCLASS;  
717        c = *(++ptr);        c = *(++ptr);
718        }        }
719      else      else negate_class = FALSE;
       {  
       negate_class = FALSE;  
       *code++ = OP_CLASS;  
       }  
720    
721      /* Keep a count of chars so that we can optimize the case of just a single      /* Keep a count of chars so that we can optimize the case of just a single
722      character. */      character. */
# Line 723  for (;; ptr++) Line 752  for (;; ptr++)
752    
753        if (c == '\\')        if (c == '\\')
754          {          {
755          c = check_escape(&ptr, errorptr, *brackets, options, TRUE);          c = check_escape(&ptr, errorptr, *brackets, options, TRUE, cd);
756          if (-c == ESC_b) c = '\b';          if (-c == ESC_b) c = '\b';
757          else if (c < 0)          else if (c < 0)
758            {            {
759              register const uschar *cbits = cd->cbits;
760            class_charcount = 10;            class_charcount = 10;
761            switch (-c)            switch (-c)
762              {              {
763              case ESC_d:              case ESC_d:
764              for (c = 0; c < 32; c++) class[c] |= pcre_cbits[c+cbit_digit];              for (c = 0; c < 32; c++) class[c] |= cbits[c+cbit_digit];
765              continue;              continue;
766    
767              case ESC_D:              case ESC_D:
768              for (c = 0; c < 32; c++) class[c] |= ~pcre_cbits[c+cbit_digit];              for (c = 0; c < 32; c++) class[c] |= ~cbits[c+cbit_digit];
769              continue;              continue;
770    
771              case ESC_w:              case ESC_w:
772              for (c = 0; c < 32; c++)              for (c = 0; c < 32; c++)
773                class[c] |= (pcre_cbits[c] | pcre_cbits[c+cbit_word]);                class[c] |= (cbits[c+cbit_digit] | cbits[c+cbit_word]);
774              continue;              continue;
775    
776              case ESC_W:              case ESC_W:
777              for (c = 0; c < 32; c++)              for (c = 0; c < 32; c++)
778                class[c] |= ~(pcre_cbits[c] | pcre_cbits[c+cbit_word]);                class[c] |= ~(cbits[c+cbit_digit] | cbits[c+cbit_word]);
779              continue;              continue;
780    
781              case ESC_s:              case ESC_s:
782              for (c = 0; c < 32; c++) class[c] |= pcre_cbits[c+cbit_space];              for (c = 0; c < 32; c++) class[c] |= cbits[c+cbit_space];
783              continue;              continue;
784    
785              case ESC_S:              case ESC_S:
786              for (c = 0; c < 32; c++) class[c] |= ~pcre_cbits[c+cbit_space];              for (c = 0; c < 32; c++) class[c] |= ~cbits[c+cbit_space];
787              continue;              continue;
788    
789              default:              default:
# Line 785  for (;; ptr++) Line 815  for (;; ptr++)
815    
816          if (d == '\\')          if (d == '\\')
817            {            {
818            d = check_escape(&ptr, errorptr, *brackets, options, TRUE);            d = check_escape(&ptr, errorptr, *brackets, options, TRUE, cd);
819            if (d < 0)            if (d < 0)
820              {              {
821              if (d == -ESC_b) d = '\b'; else              if (d == -ESC_b) d = '\b'; else
# Line 807  for (;; ptr++) Line 837  for (;; ptr++)
837            class[c/8] |= (1 << (c&7));            class[c/8] |= (1 << (c&7));
838            if ((options & PCRE_CASELESS) != 0)            if ((options & PCRE_CASELESS) != 0)
839              {              {
840              int uc = pcre_fcc[c];           /* flip case */              int uc = cd->fcc[c];           /* flip case */
841              class[uc/8] |= (1 << (uc&7));              class[uc/8] |= (1 << (uc&7));
842              }              }
843            class_charcount++;                /* in case a one-char range */            class_charcount++;                /* in case a one-char range */
# Line 822  for (;; ptr++) Line 852  for (;; ptr++)
852        class [c/8] |= (1 << (c&7));        class [c/8] |= (1 << (c&7));
853        if ((options & PCRE_CASELESS) != 0)        if ((options & PCRE_CASELESS) != 0)
854          {          {
855          c = pcre_fcc[c];   /* flip case */          c = cd->fcc[c];   /* flip case */
856          class[c/8] |= (1 << (c&7));          class[c/8] |= (1 << (c&7));
857          }          }
858        class_charcount++;        class_charcount++;
# Line 869  for (;; ptr++) Line 899  for (;; ptr++)
899      /* Various kinds of repeat */      /* Various kinds of repeat */
900    
901      case '{':      case '{':
902      if (!is_counted_repeat(ptr+1)) goto NORMAL_CHAR;      if (!is_counted_repeat(ptr+1, cd)) goto NORMAL_CHAR;
903      ptr = read_repeat_counts(ptr+1, &repeat_min, &repeat_max, errorptr);      ptr = read_repeat_counts(ptr+1, &repeat_min, &repeat_max, errorptr, cd);
904      if (*errorptr != NULL) goto FAILED;      if (*errorptr != NULL) goto FAILED;
905      goto REPEAT;      goto REPEAT;
906    
# Line 895  for (;; ptr++) Line 925  for (;; ptr++)
925        goto FAILED;        goto FAILED;
926        }        }
927    
928      /* If the next character is '?' this is a minimizing repeat. Advance to the      /* If the next character is '?' this is a minimizing repeat, by default,
929        but if PCRE_UNGREEDY is set, it works the other way round. Advance to the
930      next character. */      next character. */
931    
932      if (ptr[1] == '?') { repeat_type = 1; ptr++; } else repeat_type = 0;      if (ptr[1] == '?')
933          { repeat_type = greedy_non_default; ptr++; }
934        else repeat_type = greedy_default;
935    
936      /* If the maximum is zero then the minimum must also be zero; Perl allows      /* If the maximum is zero then the minimum must also be zero; Perl allows
937      this case, so we do too - by simply omitting the item altogether. */      this case, so we do too - by simply omitting the item altogether. */
# Line 943  for (;; ptr++) Line 976  for (;; ptr++)
976      create a suitable repeat item. The code is shared with single-character      create a suitable repeat item. The code is shared with single-character
977      repeats by adding a suitable offset into repeat_type. */      repeats by adding a suitable offset into repeat_type. */
978    
979      else if ((int)*previous < OP_EOD || *previous == OP_ANY)      else if ((int)*previous < OP_EODN || *previous == OP_ANY)
980        {        {
981        op_type = OP_TYPESTAR - OP_STAR;  /* Use type opcodes */        op_type = OP_TYPESTAR - OP_STAR;  /* Use type opcodes */
982        c = *previous;        c = *previous;
# Line 987  for (;; ptr++) Line 1020  for (;; ptr++)
1020          /* If the mininum is 1 and the previous item was a character string,          /* If the mininum is 1 and the previous item was a character string,
1021          we either have to put back the item that got cancelled if the string          we either have to put back the item that got cancelled if the string
1022          length was 1, or add the character back onto the end of a longer          length was 1, or add the character back onto the end of a longer
1023          string. For a character type nothing need be done; it will just get put          string. For a character type nothing need be done; it will just get
1024          back naturally. */          put back naturally. Note that the final character is always going to
1025            get added below. */
1026    
1027          else if (*previous == OP_CHARS)          else if (*previous == OP_CHARS)
1028            {            {
1029            if (code == previous) code += 2; else previous[1]++;            if (code == previous) code += 2; else previous[1]++;
1030            }            }
1031    
1032            /*  For a single negated character we also have to put back the
1033            item that got cancelled. */
1034    
1035            else if (*previous == OP_NOT) code++;
1036    
1037          /* If the maximum is unlimited, insert an OP_STAR. */          /* If the maximum is unlimited, insert an OP_STAR. */
1038    
1039          if (repeat_max < 0)          if (repeat_max < 0)
# Line 1023  for (;; ptr++) Line 1062  for (;; ptr++)
1062      /* If previous was a character class or a back reference, we put the repeat      /* If previous was a character class or a back reference, we put the repeat
1063      stuff after it. */      stuff after it. */
1064    
1065      else if (*previous == OP_CLASS || *previous == OP_NEGCLASS ||      else if (*previous == OP_CLASS || *previous == OP_REF)
              *previous == OP_REF)  
1066        {        {
1067        if (repeat_min == 0 && repeat_max == -1)        if (repeat_min == 0 && repeat_max == -1)
1068          *code++ = OP_CRSTAR + repeat_type;          *code++ = OP_CRSTAR + repeat_type;
# Line 1044  for (;; ptr++) Line 1082  for (;; ptr++)
1082        }        }
1083    
1084      /* If previous was a bracket group, we may have to replicate it in certain      /* If previous was a bracket group, we may have to replicate it in certain
1085      cases. If the maximum repeat count is unlimited, check that the bracket      cases. */
     group cannot match the empty string, and diagnose an error if it can. */  
1086    
1087      else if ((int)*previous >= OP_BRA)      else if ((int)*previous >= OP_BRA || (int)*previous == OP_ONCE ||
1088                 (int)*previous == OP_COND)
1089        {        {
1090        int i;        int i, ketoffset = 0;
1091        int len = code - previous;        int len = code - previous;
1092    
1093        if (repeat_max == -1 && could_be_empty(previous))        /* If the maximum repeat count is unlimited, find the end of the bracket
1094          {        by scanning through from the start, and compute the offset back to it
1095          *errorptr = ERR10;        from the current code pointer. There may be an OP_OPT setting following
1096          goto FAILED;        the final KET, so we can't find the end just by going back from the code
1097          pointer. */
1098    
1099          if (repeat_max == -1)
1100            {
1101            register uschar *ket = previous;
1102            do ket += (ket[1] << 8) + ket[2]; while (*ket != OP_KET);
1103            ketoffset = code - ket;
1104          }          }
1105    
1106        /* If the minimum is greater than zero, and the maximum is unlimited or        /* If the minimum is greater than zero, and the maximum is unlimited or
# Line 1099  for (;; ptr++) Line 1144  for (;; ptr++)
1144            }            }
1145          }          }
1146    
1147        /* If the maximum is unlimited, set a repeater in the final copy. */        /* If the maximum is unlimited, set a repeater in the final copy. We
1148          can't just offset backwards from the current code point, because we
1149          don't know if there's been an options resetting after the ket. The
1150          correct offset was computed above. */
1151    
1152        if (repeat_max == -1) code[-3] = OP_KETRMAX + repeat_type;        if (repeat_max == -1) code[-ketoffset] = OP_KETRMAX + repeat_type;
1153        }        }
1154    
1155      /* Else there's some kind of shambles */      /* Else there's some kind of shambles */
# Line 1118  for (;; ptr++) Line 1166  for (;; ptr++)
1166      break;      break;
1167    
1168    
1169      /* Start of nested bracket sub-expression, or comment or lookahead.      /* Start of nested bracket sub-expression, or comment or lookahead or
1170      First deal with special things that can come after a bracket; all are      lookbehind or option setting or condition. First deal with special things
1171      introduced by ?, and the appearance of any of them means that this is not a      that can come after a bracket; all are introduced by ?, and the appearance
1172      referencing group. They were checked for validity in the first pass over      of any of them means that this is not a referencing group. They were
1173      the string, so we don't have to check for syntax errors here.  */      checked for validity in the first pass over the string, so we don't have to
1174        check for syntax errors here.  */
1175    
1176      case '(':      case '(':
1177      previous = code;              /* Only real brackets can be repeated */      newoptions = options;
1178        condref = -1;
1179    
1180      if (*(++ptr) == '?')      if (*(++ptr) == '?')
1181        {        {
1182        bravalue = OP_BRA;        int set, unset;
1183          int *optset;
1184    
1185        switch (*(++ptr))        switch (*(++ptr))
1186          {          {
1187          case '#':          case '#':                 /* Comment; skip to ket */
         case 'i':  
         case 'm':  
         case 's':  
         case 'x':  
1188          ptr++;          ptr++;
1189          while (*ptr != ')') ptr++;          while (*ptr != ')') ptr++;
         previous = NULL;  
1190          continue;          continue;
1191    
1192          case ':':                 /* Non-extracting bracket */          case ':':                 /* Non-extracting bracket */
1193            bravalue = OP_BRA;
1194          ptr++;          ptr++;
1195          break;          break;
1196    
1197          case '=':                 /* Assertions can't be repeated */          case '(':
1198            bravalue = OP_COND;       /* Conditional group */
1199            if ((cd->ctypes[*(++ptr)] & ctype_digit) != 0)
1200              {
1201              condref = *ptr - '0';
1202              while (*(++ptr) != ')') condref = condref*10 + *ptr - '0';
1203              ptr++;
1204              }
1205            else ptr--;
1206            break;
1207    
1208            case '=':                 /* Positive lookahead */
1209          bravalue = OP_ASSERT;          bravalue = OP_ASSERT;
1210          ptr++;          ptr++;
         previous = NULL;  
1211          break;          break;
1212    
1213          case '!':          case '!':                 /* Negative lookahead */
1214          bravalue = OP_ASSERT_NOT;          bravalue = OP_ASSERT_NOT;
1215          ptr++;          ptr++;
         previous = NULL;  
1216          break;          break;
1217    
1218          case '>':                         /* "Match once" brackets */          case '<':                 /* Lookbehinds */
1219          if ((options & PCRE_EXTRA) != 0)  /* Not yet standard */          switch (*(++ptr))
1220            {            {
1221            bravalue = OP_ONCE;            case '=':               /* Positive lookbehind */
1222              bravalue = OP_ASSERTBACK;
1223              ptr++;
1224              break;
1225    
1226              case '!':               /* Negative lookbehind */
1227              bravalue = OP_ASSERTBACK_NOT;
1228            ptr++;            ptr++;
           previous = NULL;  
1229            break;            break;
1230    
1231              default:                /* Syntax error */
1232              *errorptr = ERR24;
1233              goto FAILED;
1234            }            }
1235          /* Else fall through */          break;
1236    
1237          default:          case '>':                 /* One-time brackets */
1238          *errorptr = ERR12;          bravalue = OP_ONCE;
1239          goto FAILED;          ptr++;
1240            break;
1241    
1242            default:                  /* Option setting */
1243            set = unset = 0;
1244            optset = &set;
1245    
1246            while (*ptr != ')' && *ptr != ':')
1247              {
1248              switch (*ptr++)
1249                {
1250                case '-': optset = &unset; break;
1251    
1252                case 'i': *optset |= PCRE_CASELESS; break;
1253                case 'm': *optset |= PCRE_MULTILINE; break;
1254                case 's': *optset |= PCRE_DOTALL; break;
1255                case 'x': *optset |= PCRE_EXTENDED; break;
1256                case 'U': *optset |= PCRE_UNGREEDY; break;
1257                case 'X': *optset |= PCRE_EXTRA; break;
1258    
1259                default:
1260                *errorptr = ERR12;
1261                goto FAILED;
1262                }
1263              }
1264    
1265            /* Set up the changed option bits, but don't change anything yet. */
1266    
1267            newoptions = (options | set) & (~unset);
1268    
1269            /* If the options ended with ')' this is not the start of a nested
1270            group with option changes, so the options change at this level. At top
1271            level there is nothing else to be done (the options will in fact have
1272            been set from the start of compiling as a result of the first pass) but
1273            at an inner level we must compile code to change the ims options if
1274            necessary, and pass the new setting back so that it can be put at the
1275            start of any following branches, and when this group ends, a resetting
1276            item can be compiled. */
1277    
1278            if (*ptr == ')')
1279              {
1280              if ((options & PCRE_INGROUP) != 0 &&
1281                  (options & PCRE_IMS) != (newoptions & PCRE_IMS))
1282                {
1283                *code++ = OP_OPT;
1284                *code++ = *optchanged = newoptions & PCRE_IMS;
1285                }
1286              options = newoptions;  /* Change options at this level */
1287              previous = NULL;       /* This item can't be repeated */
1288              continue;              /* It is complete */
1289              }
1290    
1291            /* If the options ended with ':' we are heading into a nested group
1292            with possible change of options. Such groups are non-capturing and are
1293            not assertions of any kind. All we need to do is skip over the ':';
1294            the newoptions value is handled below. */
1295    
1296            bravalue = OP_BRA;
1297            ptr++;
1298          }          }
1299        }        }
1300    
1301      /* Else we have a referencing group */      /* Else we have a referencing group; adjust the opcode. */
1302    
1303      else      else
1304        {        {
# Line 1186  for (;; ptr++) Line 1310  for (;; ptr++)
1310        bravalue = OP_BRA + *brackets;        bravalue = OP_BRA + *brackets;
1311        }        }
1312    
1313      /* Process nested bracketed re; at end pointer is on the bracket. We copy      /* Process nested bracketed re. Assertions may not be repeated, but other
1314      code into a non-register variable in order to be able to pass its address      kinds can be. We copy code into a non-register variable in order to be able
1315      because some compilers complain otherwise. */      to pass its address because some compilers complain otherwise. Pass in a
1316        new setting for the ims options if they have changed. */
1317    
1318        previous = (bravalue >= OP_ONCE)? code : NULL;
1319      *code = bravalue;      *code = bravalue;
1320        tempcode = code;
1321    
1322        if (!compile_regex(
1323             options | PCRE_INGROUP,       /* Set for all nested groups */
1324             ((options & PCRE_IMS) != (newoptions & PCRE_IMS))?
1325               newoptions & PCRE_IMS : -1, /* Pass ims options if changed */
1326             brackets,                     /* Bracket level */
1327             &tempcode,                    /* Where to put code (updated) */
1328             &ptr,                         /* Input pointer (updated) */
1329             errorptr,                     /* Where to put an error message */
1330             (bravalue == OP_ASSERTBACK ||
1331              bravalue == OP_ASSERTBACK_NOT), /* TRUE if back assert */
1332             condref,                      /* Condition reference number */
1333             cd))                          /* Tables block */
1334          goto FAILED;
1335    
1336        /* At the end of compiling, code is still pointing to the start of the
1337        group, while tempcode has been updated to point past the end of the group
1338        and any option resetting that may follow it. The pattern pointer (ptr)
1339        is on the bracket. */
1340    
1341        /* If this is a conditional bracket, check that there are no more than
1342        two branches in the group. */
1343    
1344        if (bravalue == OP_COND)
1345        {        {
1346        uschar *mcode = code;        int branchcount = 0;
1347        if (!compile_regex(options, brackets, &mcode, &ptr, errorptr))        uschar *tc = code;
1348    
1349          do {
1350             branchcount++;
1351             tc += (tc[1] << 8) | tc[2];
1352             }
1353          while (*tc != OP_KET);
1354    
1355          if (branchcount > 2)
1356            {
1357            *errorptr = ERR27;
1358          goto FAILED;          goto FAILED;
1359        code = mcode;          }
1360        }        }
1361    
1362        /* Now update the main code pointer to the end of the group. */
1363    
1364        code = tempcode;
1365    
1366        /* Error if hit end of pattern */
1367    
1368      if (*ptr != ')')      if (*ptr != ')')
1369        {        {
1370        *errorptr = ERR14;        *errorptr = ERR14;
# Line 1210  for (;; ptr++) Line 1377  for (;; ptr++)
1377      for validity in the pre-compiling pass. */      for validity in the pre-compiling pass. */
1378    
1379      case '\\':      case '\\':
1380      oldptr = ptr;      tempptr = ptr;
1381      c = check_escape(&ptr, errorptr, *brackets, options, FALSE);      c = check_escape(&ptr, errorptr, *brackets, options, FALSE, cd);
1382    
1383      /* Handle metacharacters introduced by \. For ones like \d, the ESC_ values      /* Handle metacharacters introduced by \. For ones like \d, the ESC_ values
1384      are arranged to be the negation of the corresponding OP_values. For the      are arranged to be the negation of the corresponding OP_values. For the
# Line 1224  for (;; ptr++) Line 1391  for (;; ptr++)
1391        {        {
1392        if (-c >= ESC_REF)        if (-c >= ESC_REF)
1393          {          {
         int refnum = -c - ESC_REF;  
         if (*brackets < refnum)  
           {  
           *errorptr = ERR15;  
           goto FAILED;  
           }  
1394          previous = code;          previous = code;
1395          *code++ = OP_REF;          *code++ = OP_REF;
1396          *code++ = refnum;          *code++ = -c - ESC_REF;
1397          }          }
1398        else        else
1399          {          {
1400          previous = (-c > ESC_b && -c < ESC_X)? code : NULL;          previous = (-c > ESC_b && -c < ESC_Z)? code : NULL;
1401          *code++ = -c;          *code++ = -c;
1402          }          }
1403        continue;        continue;
# Line 1244  for (;; ptr++) Line 1405  for (;; ptr++)
1405    
1406      /* Data character: reset and fall through */      /* Data character: reset and fall through */
1407    
1408      ptr = oldptr;      ptr = tempptr;
1409      c = '\\';      c = '\\';
1410    
1411      /* Handle a run of data characters until a metacharacter is encountered.      /* Handle a run of data characters until a metacharacter is encountered.
# Line 1262  for (;; ptr++) Line 1423  for (;; ptr++)
1423        {        {
1424        if ((options & PCRE_EXTENDED) != 0)        if ((options & PCRE_EXTENDED) != 0)
1425          {          {
1426          if ((pcre_ctypes[c] & ctype_space) != 0) continue;          if ((cd->ctypes[c] & ctype_space) != 0) continue;
1427          if (c == '#')          if (c == '#')
1428            {            {
1429            while ((c = *(++ptr)) != 0 && c != '\n');            while ((c = *(++ptr)) != 0 && c != '\n');
# Line 1277  for (;; ptr++) Line 1438  for (;; ptr++)
1438    
1439        if (c == '\\')        if (c == '\\')
1440          {          {
1441          oldptr = ptr;          tempptr = ptr;
1442          c = check_escape(&ptr, errorptr, *brackets, options, FALSE);          c = check_escape(&ptr, errorptr, *brackets, options, FALSE, cd);
1443          if (c < 0) { ptr = oldptr; break; }          if (c < 0) { ptr = tempptr; break; }
1444          }          }
1445    
1446        /* Ordinary character or single-char escape */        /* Ordinary character or single-char escape */
# Line 1290  for (;; ptr++) Line 1451  for (;; ptr++)
1451    
1452      /* This "while" is the end of the "do" above. */      /* This "while" is the end of the "do" above. */
1453    
1454      while (length < 255 && (pcre_ctypes[c = *(++ptr)] & ctype_meta) == 0);      while (length < 255 && (cd->ctypes[c = *(++ptr)] & ctype_meta) == 0);
1455    
1456      /* Compute the length and set it in the data vector, and advance to      /* Compute the length and set it in the data vector, and advance to
1457      the next state. */      the next state. */
1458    
1459      previous[1] = length;      previous[1] = length;
1460      ptr--;      if (length < 255) ptr--;
1461      break;      break;
1462      }      }
1463    }                   /* end of big loop */    }                   /* end of big loop */
# Line 1320  return FALSE; Line 1481  return FALSE;
1481  /* On entry, ptr is pointing past the bracket character, but on return  /* On entry, ptr is pointing past the bracket character, but on return
1482  it points to the closing bracket, or vertical bar, or end of string.  it points to the closing bracket, or vertical bar, or end of string.
1483  The code variable is pointing at the byte into which the BRA operator has been  The code variable is pointing at the byte into which the BRA operator has been
1484  stored.  stored. If the ims options are changed at the start (for a (?ims: group) or
1485    during any branch, we need to insert an OP_OPT item at the start of every
1486    following branch to ensure they get set correctly at run time, and also pass
1487    the new options into every subsequent branch compile.
1488    
1489  Argument:  Argument:
1490    options   the option bits    options     the option bits
1491    brackets  -> int containing the number of extracting brackets used    optchanged  new ims options to set as if (?ims) were at the start, or -1
1492    codeptr   -> the address of the current code pointer                 for no change
1493    ptrptr    -> the address of the current pattern pointer    brackets    -> int containing the number of extracting brackets used
1494    errorptr  -> pointer to error message    codeptr     -> the address of the current code pointer
1495      ptrptr      -> the address of the current pattern pointer
1496      errorptr    -> pointer to error message
1497      lookbehind  TRUE if this is a lookbehind assertion
1498      condref     > 0 for OPT_CREF setting at start of conditional group
1499      cd          points to the data block with tables pointers
1500    
1501  Returns:    TRUE on success  Returns:      TRUE on success
1502  */  */
1503    
1504  static BOOL  static BOOL
1505  compile_regex(int options, int *brackets, uschar **codeptr,  compile_regex(int options, int optchanged, int *brackets, uschar **codeptr,
1506    const uschar **ptrptr, const char **errorptr)    const uschar **ptrptr, const char **errorptr, BOOL lookbehind, int condref,
1507      compile_data *cd)
1508  {  {
1509  const uschar *ptr = *ptrptr;  const uschar *ptr = *ptrptr;
1510  uschar *code = *codeptr;  uschar *code = *codeptr;
1511    uschar *last_branch = code;
1512  uschar *start_bracket = code;  uschar *start_bracket = code;
1513    uschar *reverse_count = NULL;
1514    int oldoptions = options & PCRE_IMS;
1515    
1516    code += 3;
1517    
1518    /* At the start of a reference-based conditional group, insert the reference
1519    number as an OP_CREF item. */
1520    
1521    if (condref > 0)
1522      {
1523      *code++ = OP_CREF;
1524      *code++ = condref;
1525      }
1526    
1527    /* Loop for each alternative branch */
1528    
1529  for (;;)  for (;;)
1530    {    {
1531    int length;    int length;
   uschar *last_branch = code;  
1532    
1533    code += 3;    /* Handle change of options */
1534    if (!compile_branch(options, brackets, &code, &ptr, errorptr))  
1535      if (optchanged >= 0)
1536        {
1537        *code++ = OP_OPT;
1538        *code++ = optchanged;
1539        options = (options & ~PCRE_IMS) | optchanged;
1540        }
1541    
1542      /* Set up dummy OP_REVERSE if lookbehind assertion */
1543    
1544      if (lookbehind)
1545        {
1546        *code++ = OP_REVERSE;
1547        reverse_count = code;
1548        *code++ = 0;
1549        *code++ = 0;
1550        }
1551    
1552      /* Now compile the branch */
1553    
1554      if (!compile_branch(options,brackets,&code,&ptr,errorptr,&optchanged,cd))
1555      {      {
1556      *ptrptr = ptr;      *ptrptr = ptr;
1557      return FALSE;      return FALSE;
# Line 1358  for (;;) Line 1563  for (;;)
1563    last_branch[1] = length >> 8;    last_branch[1] = length >> 8;
1564    last_branch[2] = length & 255;    last_branch[2] = length & 255;
1565    
1566      /* If lookbehind, check that this branch matches a fixed-length string,
1567      and put the length into the OP_REVERSE item. Temporarily mark the end of
1568      the branch with OP_END. */
1569    
1570      if (lookbehind)
1571        {
1572        *code = OP_END;
1573        length = find_fixedlength(last_branch);
1574        DPRINTF(("fixed length = %d\n", length));
1575        if (length < 0)
1576          {
1577          *errorptr = ERR25;
1578          *ptrptr = ptr;
1579          return FALSE;
1580          }
1581        reverse_count[0] = (length >> 8);
1582        reverse_count[1] = length & 255;
1583        }
1584    
1585    /* Reached end of expression, either ')' or end of pattern. Insert a    /* Reached end of expression, either ')' or end of pattern. Insert a
1586    terminating ket and the length of the whole bracketed item, and return,    terminating ket and the length of the whole bracketed item, and return,
1587    leaving the pointer at the terminating char. */    leaving the pointer at the terminating char. If any of the ims options
1588      were changed inside the group, compile a resetting op-code following. */
1589    
1590    if (*ptr != '|')    if (*ptr != '|')
1591      {      {
# Line 1368  for (;;) Line 1593  for (;;)
1593      *code++ = OP_KET;      *code++ = OP_KET;
1594      *code++ = length >> 8;      *code++ = length >> 8;
1595      *code++ = length & 255;      *code++ = length & 255;
1596        if (optchanged >= 0)
1597          {
1598          *code++ = OP_OPT;
1599          *code++ = oldoptions;
1600          }
1601      *codeptr = code;      *codeptr = code;
1602      *ptrptr = ptr;      *ptrptr = ptr;
1603      return TRUE;      return TRUE;
# Line 1376  for (;;) Line 1606  for (;;)
1606    /* Another branch follows; insert an "or" node and advance the pointer. */    /* Another branch follows; insert an "or" node and advance the pointer. */
1607    
1608    *code = OP_ALT;    *code = OP_ALT;
1609      last_branch = code;
1610      code += 3;
1611    ptr++;    ptr++;
1612    }    }
1613  /* Control never reaches here */  /* Control never reaches here */
# Line 1383  for (;;) Line 1615  for (;;)
1615    
1616    
1617    
1618    
1619    /*************************************************
1620    *      Find first significant op code            *
1621    *************************************************/
1622    
1623    /* This is called by several functions that scan a compiled expression looking
1624    for a fixed first character, or an anchoring op code etc. It skips over things
1625    that do not influence this. For one application, a change of caseless option is
1626    important.
1627    
1628    Arguments:
1629      code       pointer to the start of the group
1630      options    pointer to external options
1631      optbit     the option bit whose changing is significant, or
1632                 zero if none are
1633      optstop    TRUE to return on option change, otherwise change the options
1634                   value and continue
1635    
1636    Returns:     pointer to the first significant opcode
1637    */
1638    
1639    static const uschar*
1640    first_significant_code(const uschar *code, int *options, int optbit,
1641      BOOL optstop)
1642    {
1643    for (;;)
1644      {
1645      switch ((int)*code)
1646        {
1647        case OP_OPT:
1648        if (optbit > 0 && ((int)code[1] & optbit) != (*options & optbit))
1649          {
1650          if (optstop) return code;
1651          *options = (int)code[1];
1652          }
1653        code += 2;
1654        break;
1655    
1656        case OP_CREF:
1657        code += 2;
1658        break;
1659    
1660        case OP_ASSERT_NOT:
1661        case OP_ASSERTBACK:
1662        case OP_ASSERTBACK_NOT:
1663        do code += (code[1] << 8) + code[2]; while (*code == OP_ALT);
1664        code += 3;
1665        break;
1666    
1667        default:
1668        return code;
1669        }
1670      }
1671    /* Control never reaches here */
1672    }
1673    
1674    
1675    
1676    
1677  /*************************************************  /*************************************************
1678  *          Check for anchored expression         *  *          Check for anchored expression         *
1679  *************************************************/  *************************************************/
# Line 1397  A branch is also implicitly anchored if Line 1688  A branch is also implicitly anchored if
1688  the rest of the pattern at all possible matching points, so there is no point  the rest of the pattern at all possible matching points, so there is no point
1689  trying them again.  trying them again.
1690    
1691  Argument:  points to start of expression (the bracket)  Arguments:
1692  Returns:   TRUE or FALSE    code       points to start of expression (the bracket)
1693      options    points to the options setting
1694    
1695    Returns:     TRUE or FALSE
1696  */  */
1697    
1698  static BOOL  static BOOL
1699  is_anchored(register const uschar *code, BOOL multiline)  is_anchored(register const uschar *code, int *options)
1700  {  {
1701  do {  do {
1702     int op = (int)code[3];     const uschar *scode = first_significant_code(code + 3, options,
1703     if (op >= OP_BRA || op == OP_ASSERT || op == OP_ONCE)       PCRE_MULTILINE, FALSE);
1704       { if (!is_anchored(code+3, multiline)) return FALSE; }     register int op = *scode;
1705       if (op >= OP_BRA || op == OP_ASSERT || op == OP_ONCE || op == OP_COND)
1706         { if (!is_anchored(scode, options)) return FALSE; }
1707     else if (op == OP_TYPESTAR || op == OP_TYPEMINSTAR)     else if (op == OP_TYPESTAR || op == OP_TYPEMINSTAR)
1708       { if (code[4] != OP_ANY) return FALSE; }       { if (scode[1] != OP_ANY) return FALSE; }
1709     else if (op != OP_SOD && (multiline || op != OP_CIRC)) return FALSE;     else if (op != OP_SOD &&
1710               ((*options & PCRE_MULTILINE) != 0 || op != OP_CIRC))
1711         return FALSE;
1712     code += (code[1] << 8) + code[2];     code += (code[1] << 8) + code[2];
1713     }     }
1714  while (*code == OP_ALT);  while (*code == OP_ALT);
# Line 1434  static BOOL Line 1732  static BOOL
1732  is_startline(const uschar *code)  is_startline(const uschar *code)
1733  {  {
1734  do {  do {
1735     if ((int)code[3] >= OP_BRA || code[3] == OP_ASSERT)     const uschar *scode = first_significant_code(code + 3, NULL, 0, FALSE);
1736       { if (!is_startline(code+3)) return FALSE; }     register int op = *scode;
1737     else if (code[3] != OP_CIRC) return FALSE;     if (op >= OP_BRA || op == OP_ASSERT || op == OP_ONCE || op == OP_COND)
1738         { if (!is_startline(scode)) return FALSE; }
1739       else if (op != OP_CIRC) return FALSE;
1740     code += (code[1] << 8) + code[2];     code += (code[1] << 8) + code[2];
1741     }     }
1742  while (*code == OP_ALT);  while (*code == OP_ALT);
# Line 1455  Consider each alternative branch. If the Line 1755  Consider each alternative branch. If the
1755  a bracket all of whose alternatives start with the same char (recurse ad lib),  a bracket all of whose alternatives start with the same char (recurse ad lib),
1756  then we return that char, otherwise -1.  then we return that char, otherwise -1.
1757    
1758  Argument:  points to start of expression (the bracket)  Arguments:
1759  Returns:   -1 or the fixed first char    code       points to start of expression (the bracket)
1760      options    pointer to the options (used to check casing changes)
1761    
1762    Returns:     -1 or the fixed first char
1763  */  */
1764    
1765  static int  static int
1766  find_firstchar(uschar *code)  find_firstchar(const uschar *code, int *options)
1767  {  {
1768  register int c = -1;  register int c = -1;
1769  do  do {
1770    {     int d;
1771    register int charoffset = 4;     const uschar *scode = first_significant_code(code + 3, options,
1772         PCRE_CASELESS, TRUE);
1773    if ((int)code[3] >= OP_BRA || code[3] == OP_ASSERT)     register int op = *scode;
1774      {  
1775      register int d;     if (op >= OP_BRA) op = OP_BRA;
1776      if ((d = find_firstchar(code+3)) < 0) return -1;  
1777      if (c < 0) c = d; else if (c != d) return -1;     switch(op)
1778      }       {
1779         default:
1780    else switch(code[3])       return -1;
1781      {  
1782      default:       case OP_BRA:
1783      return -1;       case OP_ASSERT:
1784         case OP_ONCE:
1785      case OP_EXACT:       /* Fall through */       case OP_COND:
1786      charoffset++;       if ((d = find_firstchar(scode, options)) < 0) return -1;
1787         if (c < 0) c = d; else if (c != d) return -1;
1788      case OP_CHARS:       /* Fall through */       break;
1789      charoffset++;  
1790         case OP_EXACT:       /* Fall through */
1791         scode++;
1792    
1793         case OP_CHARS:       /* Fall through */
1794         scode++;
1795    
1796         case OP_PLUS:
1797         case OP_MINPLUS:
1798         if (c < 0) c = scode[1]; else if (c != scode[1]) return -1;
1799         break;
1800         }
1801    
1802      case OP_PLUS:     code += (code[1] << 8) + code[2];
1803      case OP_MINPLUS:     }
     if (c < 0) c = code[charoffset]; else if (c != code[charoffset]) return -1;  
     break;  
     }  
   code += (code[1] << 8) + code[2];  
   }  
1804  while (*code == OP_ALT);  while (*code == OP_ALT);
1805  return c;  return c;
1806  }  }
1807    
1808    
1809    
1810    
1811    
1812  /*************************************************  /*************************************************
1813  *        Compile a Regular Expression            *  *        Compile a Regular Expression            *
1814  *************************************************/  *************************************************/
# Line 1510  Arguments: Line 1821  Arguments:
1821    options      various option bits    options      various option bits
1822    errorptr     pointer to pointer to error text    errorptr     pointer to pointer to error text
1823    erroroffset  ptr offset in pattern where error was detected    erroroffset  ptr offset in pattern where error was detected
1824      tables       pointer to character tables or NULL
1825    
1826  Returns:       pointer to compiled data block, or NULL on error,  Returns:       pointer to compiled data block, or NULL on error,
1827                 with errorptr and erroroffset set                 with errorptr and erroroffset set
# Line 1517  Returns:       pointer to compiled data Line 1829  Returns:       pointer to compiled data
1829    
1830  pcre *  pcre *
1831  pcre_compile(const char *pattern, int options, const char **errorptr,  pcre_compile(const char *pattern, int options, const char **errorptr,
1832    int *erroroffset)    int *erroroffset, const unsigned char *tables)
1833  {  {
1834  real_pcre *re;  real_pcre *re;
 int spaces = 0;  
1835  int length = 3;      /* For initial BRA plus length */  int length = 3;      /* For initial BRA plus length */
1836  int runlength;  int runlength;
1837  int c, size;  int c, size;
1838  int bracount = 0;  int bracount = 0;
 int brastack[200];  
1839  int top_backref = 0;  int top_backref = 0;
1840    int branch_extra = 0;
1841    int branch_newextra;
1842  unsigned int brastackptr = 0;  unsigned int brastackptr = 0;
1843  uschar *code;  uschar *code;
1844  const uschar *ptr;  const uschar *ptr;
1845    compile_data compile_block;
1846    int brastack[BRASTACK_SIZE];
1847    uschar bralenstack[BRASTACK_SIZE];
1848    
1849  #ifdef DEBUG  #ifdef DEBUG
1850  uschar *code_base, *code_end;  uschar *code_base, *code_end;
# Line 1556  if ((options & ~PUBLIC_OPTIONS) != 0) Line 1871  if ((options & ~PUBLIC_OPTIONS) != 0)
1871    return NULL;    return NULL;
1872    }    }
1873    
1874    /* Set up pointers to the individual character tables */
1875    
1876    if (tables == NULL) tables = pcre_default_tables;
1877    compile_block.lcc = tables + lcc_offset;
1878    compile_block.fcc = tables + fcc_offset;
1879    compile_block.cbits = tables + cbits_offset;
1880    compile_block.ctypes = tables + ctypes_offset;
1881    
1882    /* Reflect pattern for debugging output */
1883    
1884  DPRINTF(("------------------------------------------------------------------\n"));  DPRINTF(("------------------------------------------------------------------\n"));
1885  DPRINTF(("%s\n", pattern));  DPRINTF(("%s\n", pattern));
1886    
# Line 1572  while ((c = *(++ptr)) != 0) Line 1897  while ((c = *(++ptr)) != 0)
1897    int min, max;    int min, max;
1898    int class_charcount;    int class_charcount;
1899    
1900    if ((pcre_ctypes[c] & ctype_space) != 0)    if ((options & PCRE_EXTENDED) != 0)
     {  
     if ((options & PCRE_EXTENDED) != 0) continue;  
     spaces++;  
     }  
   
   if (c == '#' && (options & PCRE_EXTENDED) != 0)  
1901      {      {
1902      while ((c = *(++ptr)) != 0 && c != '\n');      if ((compile_block.ctypes[c] & ctype_space) != 0) continue;
1903      continue;      if (c == '#')
1904          {
1905          while ((c = *(++ptr)) != 0 && c != '\n');
1906          continue;
1907          }
1908      }      }
1909    
1910    switch(c)    switch(c)
# Line 1594  while ((c = *(++ptr)) != 0) Line 1917  while ((c = *(++ptr)) != 0)
1917      case '\\':      case '\\':
1918        {        {
1919        const uschar *save_ptr = ptr;        const uschar *save_ptr = ptr;
1920        c = check_escape(&ptr, errorptr, bracount, options, FALSE);        c = check_escape(&ptr, errorptr, bracount, options, FALSE, &compile_block);
1921        if (*errorptr != NULL) goto PCRE_ERROR_RETURN;        if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
1922        if (c >= 0)        if (c >= 0)
1923          {          {
# Line 1614  while ((c = *(++ptr)) != 0) Line 1937  while ((c = *(++ptr)) != 0)
1937        int refnum = -c - ESC_REF;        int refnum = -c - ESC_REF;
1938        if (refnum > top_backref) top_backref = refnum;        if (refnum > top_backref) top_backref = refnum;
1939        length++;   /* For single back reference */        length++;   /* For single back reference */
1940        if (ptr[1] == '{' && is_counted_repeat(ptr+2))        if (ptr[1] == '{' && is_counted_repeat(ptr+2, &compile_block))
1941          {          {
1942          ptr = read_repeat_counts(ptr+2, &min, &max, errorptr);          ptr = read_repeat_counts(ptr+2, &min, &max, errorptr, &compile_block);
1943          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
1944          if ((min == 0 && (max == 1 || max == -1)) ||          if ((min == 0 && (max == 1 || max == -1)) ||
1945            (min == 1 && max == -1))            (min == 1 && max == -1))
# Line 1640  while ((c = *(++ptr)) != 0) Line 1963  while ((c = *(++ptr)) != 0)
1963      or back reference. */      or back reference. */
1964    
1965      case '{':      case '{':
1966      if (!is_counted_repeat(ptr+1)) goto NORMAL_CHAR;      if (!is_counted_repeat(ptr+1, &compile_block)) goto NORMAL_CHAR;
1967      ptr = read_repeat_counts(ptr+1, &min, &max, errorptr);      ptr = read_repeat_counts(ptr+1, &min, &max, errorptr, &compile_block);
1968      if (*errorptr != NULL) goto PCRE_ERROR_RETURN;      if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
1969      if ((min == 0 && (max == 1 || max == -1)) ||      if ((min == 0 && (max == 1 || max == -1)) ||
1970        (min == 1 && max == -1))        (min == 1 && max == -1))
# Line 1655  while ((c = *(++ptr)) != 0) Line 1978  while ((c = *(++ptr)) != 0)
1978      if (ptr[1] == '?') ptr++;      if (ptr[1] == '?') ptr++;
1979      continue;      continue;
1980    
1981      /* An alternation contains an offset to the next branch or ket. */      /* An alternation contains an offset to the next branch or ket. If any ims
1982        options changed in the previous branch(es), and/or if we are in a
1983        lookbehind assertion, extra space will be needed at the start of the
1984        branch. This is handled by branch_extra. */
1985    
1986      case '|':      case '|':
1987      length += 3;      length += 3 + branch_extra;
1988      continue;      continue;
1989    
1990      /* A character class uses 33 characters. Don't worry about character types      /* A character class uses 33 characters. Don't worry about character types
# Line 1672  while ((c = *(++ptr)) != 0) Line 1999  while ((c = *(++ptr)) != 0)
1999        {        {
2000        if (*ptr == '\\')        if (*ptr == '\\')
2001          {          {
2002          int ch = check_escape(&ptr, errorptr, bracount, options, TRUE);          int ch = check_escape(&ptr, errorptr, bracount, options, TRUE,
2003              &compile_block);
2004          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
2005          if (-ch == ESC_b) class_charcount++; else class_charcount = 10;          if (-ch == ESC_b) class_charcount++; else class_charcount = 10;
2006          }          }
# Line 1689  while ((c = *(++ptr)) != 0) Line 2017  while ((c = *(++ptr)) != 0)
2017    
2018        /* A repeat needs either 1 or 5 bytes. */        /* A repeat needs either 1 or 5 bytes. */
2019    
2020        if (*ptr != 0 && ptr[1] == '{' && is_counted_repeat(ptr+2))        if (*ptr != 0 && ptr[1] == '{' && is_counted_repeat(ptr+2, &compile_block))
2021          {          {
2022          ptr = read_repeat_counts(ptr+2, &min, &max, errorptr);          ptr = read_repeat_counts(ptr+2, &min, &max, errorptr, &compile_block);
2023          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
2024          if ((min == 0 && (max == 1 || max == -1)) ||          if ((min == 0 && (max == 1 || max == -1)) ||
2025            (min == 1 && max == -1))            (min == 1 && max == -1))
# Line 1705  while ((c = *(++ptr)) != 0) Line 2033  while ((c = *(++ptr)) != 0)
2033      /* Brackets may be genuine groups or special things */      /* Brackets may be genuine groups or special things */
2034    
2035      case '(':      case '(':
2036        branch_newextra = 0;
2037    
2038      /* Handle special forms of bracket, which all start (? */      /* Handle special forms of bracket, which all start (? */
2039    
2040      if (ptr[1] == '?') switch (c = ptr[2])      if (ptr[1] == '?')
2041        {        {
2042        /* Skip over comments entirely */        int set, unset;
2043        case '#':        int *optset;
2044        ptr += 3;  
2045        while (*ptr != 0 && *ptr != ')') ptr++;        switch (c = ptr[2])
       if (*ptr == 0)  
2046          {          {
2047          *errorptr = ERR18;          /* Skip over comments entirely */
2048            case '#':
2049            ptr += 3;
2050            while (*ptr != 0 && *ptr != ')') ptr++;
2051            if (*ptr == 0)
2052              {
2053              *errorptr = ERR18;
2054              goto PCRE_ERROR_RETURN;
2055              }
2056            continue;
2057    
2058            /* Non-referencing groups and lookaheads just move the pointer on, and
2059            then behave like a non-special bracket, except that they don't increment
2060            the count of extracting brackets. Ditto for the "once only" bracket,
2061            which is in Perl from version 5.005. */
2062    
2063            case ':':
2064            case '=':
2065            case '!':
2066            case '>':
2067            ptr += 2;
2068            break;
2069    
2070            /* Lookbehinds are in Perl from version 5.005 */
2071    
2072            case '<':
2073            if (ptr[3] == '=' || ptr[3] == '!')
2074              {
2075              ptr += 3;
2076              branch_newextra = 3;
2077              length += 3;         /* For the first branch */
2078              break;
2079              }
2080            *errorptr = ERR24;
2081          goto PCRE_ERROR_RETURN;          goto PCRE_ERROR_RETURN;
         }  
       continue;  
2082    
2083        /* Non-referencing groups and lookaheads just move the pointer on, and          /* Conditionals are in Perl from version 5.005. The bracket must either
2084        then behave like a non-special bracket, except that they don't increment          be followed by a number (for bracket reference) or by an assertion
2085        the count of extracting brackets. */          group. */
2086    
2087        case ':':          case '(':
2088        case '=':          if ((compile_block.ctypes[ptr[3]] & ctype_digit) != 0)
2089        case '!':            {
2090        ptr += 2;            ptr += 4;
2091        break;            length += 2;
2092              while ((compile_block.ctypes[*ptr] & ctype_digit) != 0) ptr++;
2093              if (*ptr != ')')
2094                {
2095                *errorptr = ERR26;
2096                goto PCRE_ERROR_RETURN;
2097                }
2098              }
2099            else   /* An assertion must follow */
2100              {
2101              ptr++;   /* Can treat like ':' as far as spacing is concerned */
2102    
2103              if (ptr[2] != '?' || strchr("=!<", ptr[3]) == NULL)
2104                {
2105                ptr += 2;    /* To get right offset in message */
2106                *errorptr = ERR28;
2107                goto PCRE_ERROR_RETURN;
2108                }
2109              }
2110            break;
2111    
2112            /* Else loop checking valid options until ) is met. Anything else is an
2113            error. If we are without any brackets, i.e. at top level, the settings
2114            act as if specified in the options, so massage the options immediately.
2115            This is for backward compatibility with Perl 5.004. */
2116    
2117            default:
2118            set = unset = 0;
2119            optset = &set;
2120            ptr += 2;
2121    
2122            for (;; ptr++)
2123              {
2124              c = *ptr;
2125              switch (c)
2126                {
2127                case 'i':
2128                *optset |= PCRE_CASELESS;
2129                continue;
2130    
2131                case 'm':
2132                *optset |= PCRE_MULTILINE;
2133                continue;
2134    
2135                case 's':
2136                *optset |= PCRE_DOTALL;
2137                continue;
2138    
2139                case 'x':
2140                *optset |= PCRE_EXTENDED;
2141                continue;
2142    
2143                case 'X':
2144                *optset |= PCRE_EXTRA;
2145                continue;
2146    
2147                case 'U':
2148                *optset |= PCRE_UNGREEDY;
2149                continue;
2150    
2151                case '-':
2152                optset = &unset;
2153                continue;
2154    
2155                /* A termination by ')' indicates an options-setting-only item;
2156                this is global at top level; otherwise nothing is done here and
2157                it is handled during the compiling process on a per-bracket-group
2158                basis. */
2159    
2160                case ')':
2161                if (brastackptr == 0)
2162                  {
2163                  options = (options | set) & (~unset);
2164                  set = unset = 0;     /* To save length */
2165                  }
2166                /* Fall through */
2167    
2168                /* A termination by ':' indicates the start of a nested group with
2169                the given options set. This is again handled at compile time, but
2170                we must allow for compiled space if any of the ims options are
2171                set. We also have to allow for resetting space at the end of
2172                the group, which is why 4 is added to the length and not just 2.
2173                If there are several changes of options within the same group, this
2174                will lead to an over-estimate on the length, but this shouldn't
2175                matter very much. We also have to allow for resetting options at
2176                the start of any alternations, which we do by setting
2177                branch_newextra to 2. */
2178    
2179                case ':':
2180                if (((set|unset) & PCRE_IMS) != 0)
2181                  {
2182                  length += 4;
2183                  branch_newextra = 2;
2184                  }
2185                goto END_OPTIONS;
2186    
2187        /* Ditto for the "once only" bracket, allowed only if the extra bit              /* Unrecognized option character */
       is set. */  
2188    
2189        case '>':              default:
2190        if ((options & PCRE_EXTRA) != 0)              *errorptr = ERR12;
2191          {              goto PCRE_ERROR_RETURN;
2192          ptr += 2;              }
2193          break;            }
         }  
       /* Else fall thourh */  
2194    
2195        /* Else loop setting valid options until ) is met. Anything else is an          /* If we hit a closing bracket, that's it - this is a freestanding
2196        error. */          option-setting. We need to ensure that branch_extra is updated if
2197            necessary. The only values branch_newextra can have here are 0 or 2.
2198            If the value is 2, then branch_extra must either be 2 or 5, depending
2199            on whether this is a lookbehind group or not. */
2200    
2201        default:          END_OPTIONS:
2202        ptr += 2;          if (c == ')')
       for (;; ptr++)  
         {  
         if ((c = *ptr) == 'i')  
           {  
           options |= PCRE_CASELESS;  
           continue;  
           }  
         else if ((c = *ptr) == 'm')  
           {  
           options |= PCRE_MULTILINE;  
           continue;  
           }  
         else if (c == 's')  
2203            {            {
2204            options |= PCRE_DOTALL;            if (branch_newextra == 2 && (branch_extra == 0 || branch_extra == 3))
2205                branch_extra += branch_newextra;
2206            continue;            continue;
2207            }            }
         else if (c == 'x')  
           {  
           options |= PCRE_EXTENDED;  
           length -= spaces;          /* Already counted spaces */  
           continue;  
           }  
         else if (c == ')') break;  
2208    
2209          *errorptr = ERR12;          /* If options were terminated by ':' control comes here. Fall through
2210          goto PCRE_ERROR_RETURN;          to handle the group below. */
2211          }          }
       continue;                      /* End of this bracket handling */  
2212        }        }
2213    
2214      /* Extracting brackets must be counted so we can process escapes in a      /* Extracting brackets must be counted so we can process escapes in a
# Line 1784  while ((c = *(++ptr)) != 0) Line 2217  while ((c = *(++ptr)) != 0)
2217      else bracount++;      else bracount++;
2218    
2219      /* Non-special forms of bracket. Save length for computing whole length      /* Non-special forms of bracket. Save length for computing whole length
2220      at end if there's a repeat that requires duplication of the group. */      at end if there's a repeat that requires duplication of the group. Also
2221        save the current value of branch_extra, and start the new group with
2222        the new value. If non-zero, this will either be 2 for a (?imsx: group, or 3
2223        for a lookbehind assertion. */
2224    
2225      if (brastackptr >= sizeof(brastack)/sizeof(int))      if (brastackptr >= sizeof(brastack)/sizeof(int))
2226        {        {
# Line 1792  while ((c = *(++ptr)) != 0) Line 2228  while ((c = *(++ptr)) != 0)
2228        goto PCRE_ERROR_RETURN;        goto PCRE_ERROR_RETURN;
2229        }        }
2230    
2231        bralenstack[brastackptr] = branch_extra;
2232        branch_extra = branch_newextra;
2233    
2234      brastack[brastackptr++] = length;      brastack[brastackptr++] = length;
2235      length += 3;      length += 3;
2236      continue;      continue;
# Line 1799  while ((c = *(++ptr)) != 0) Line 2238  while ((c = *(++ptr)) != 0)
2238      /* Handle ket. Look for subsequent max/min; for certain sets of values we      /* Handle ket. Look for subsequent max/min; for certain sets of values we
2239      have to replicate this bracket up to that many times. If brastackptr is      have to replicate this bracket up to that many times. If brastackptr is
2240      0 this is an unmatched bracket which will generate an error, but take care      0 this is an unmatched bracket which will generate an error, but take care
2241      not to try to access brastack[-1]. */      not to try to access brastack[-1] when computing the length and restoring
2242        the branch_extra value. */
2243    
2244      case ')':      case ')':
2245      length += 3;      length += 3;
2246        {        {
2247        int minval = 1;        int minval = 1;
2248        int maxval = 1;        int maxval = 1;
2249        int duplength = (brastackptr > 0)? length - brastack[--brastackptr] : 0;        int duplength;
2250    
2251          if (brastackptr > 0)
2252            {
2253            duplength = length - brastack[--brastackptr];
2254            branch_extra = bralenstack[brastackptr];
2255            }
2256          else duplength = 0;
2257    
2258        /* Leave ptr at the final char; for read_repeat_counts this happens        /* Leave ptr at the final char; for read_repeat_counts this happens
2259        automatically; for the others we need an increment. */        automatically; for the others we need an increment. */
2260    
2261        if ((c = ptr[1]) == '{' && is_counted_repeat(ptr+2))        if ((c = ptr[1]) == '{' && is_counted_repeat(ptr+2, &compile_block))
2262          {          {
2263          ptr = read_repeat_counts(ptr+2, &minval, &maxval, errorptr);          ptr = read_repeat_counts(ptr+2, &minval, &maxval, errorptr,
2264              &compile_block);
2265          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
2266          }          }
2267        else if (c == '*') { minval = 0; maxval = -1; ptr++; }        else if (c == '*') { minval = 0; maxval = -1; ptr++; }
# Line 1842  while ((c = *(++ptr)) != 0) Line 2290  while ((c = *(++ptr)) != 0)
2290      runlength = 0;      runlength = 0;
2291      do      do
2292        {        {
2293        if ((pcre_ctypes[c] & ctype_space) != 0)        if ((options & PCRE_EXTENDED) != 0)
         {  
         if ((options & PCRE_EXTENDED) != 0) continue;  
         spaces++;  
         }  
   
       if (c == '#' && (options & PCRE_EXTENDED) != 0)  
2294          {          {
2295          while ((c = *(++ptr)) != 0 && c != '\n');          if ((compile_block.ctypes[c] & ctype_space) != 0) continue;
2296          continue;          if (c == '#')
2297              {
2298              while ((c = *(++ptr)) != 0 && c != '\n');
2299              continue;
2300              }
2301          }          }
2302    
2303        /* Backslash may introduce a data char or a metacharacter; stop the        /* Backslash may introduce a data char or a metacharacter; stop the
# Line 1860  while ((c = *(++ptr)) != 0) Line 2306  while ((c = *(++ptr)) != 0)
2306        if (c == '\\')        if (c == '\\')
2307          {          {
2308          const uschar *saveptr = ptr;          const uschar *saveptr = ptr;
2309          c = check_escape(&ptr, errorptr, bracount, options, FALSE);          c = check_escape(&ptr, errorptr, bracount, options, FALSE,
2310              &compile_block);
2311          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
2312          if (c < 0) { ptr = saveptr; break; }          if (c < 0) { ptr = saveptr; break; }
2313          }          }
# Line 1872  while ((c = *(++ptr)) != 0) Line 2319  while ((c = *(++ptr)) != 0)
2319    
2320      /* This "while" is the end of the "do" above. */      /* This "while" is the end of the "do" above. */
2321    
2322      while (runlength < 255 && (pcre_ctypes[c = *(++ptr)] & ctype_meta) == 0);      while (runlength < 255 &&
2323          (compile_block.ctypes[c = *(++ptr)] & ctype_meta) == 0);
2324    
2325      ptr--;      ptr--;
2326      length += runlength;      length += runlength;
# Line 1907  if (re == NULL) Line 2355  if (re == NULL)
2355    
2356  re->magic_number = MAGIC_NUMBER;  re->magic_number = MAGIC_NUMBER;
2357  re->options = options;  re->options = options;
2358    re->tables = tables;
2359    
2360  /* Set up a starting, non-extracting bracket, then compile the expression. On  /* Set up a starting, non-extracting bracket, then compile the expression. On
2361  error, *errorptr will be set non-NULL, so we don't need to look at the result  error, *errorptr will be set non-NULL, so we don't need to look at the result
# Line 1916  ptr = (const uschar *)pattern; Line 2365  ptr = (const uschar *)pattern;
2365  code = re->code;  code = re->code;
2366  *code = OP_BRA;  *code = OP_BRA;
2367  bracount = 0;  bracount = 0;
2368  (void)compile_regex(options, &bracount, &code, &ptr, errorptr);  (void)compile_regex(options, -1, &bracount, &code, &ptr, errorptr, FALSE, -1,
2369      &compile_block);
2370  re->top_bracket = bracount;  re->top_bracket = bracount;
2371  re->top_backref = top_backref;  re->top_backref = top_backref;
2372    
# Line 1933  if debugging, leave the test till after Line 2383  if debugging, leave the test till after
2383  if (code - re->code > length) *errorptr = ERR23;  if (code - re->code > length) *errorptr = ERR23;
2384  #endif  #endif
2385    
2386    /* Give an error if there's back reference to a non-existent capturing
2387    subpattern. */
2388    
2389    if (top_backref > re->top_bracket) *errorptr = ERR15;
2390    
2391  /* Failed to compile */  /* Failed to compile */
2392    
2393  if (*errorptr != NULL)  if (*errorptr != NULL)
# Line 1951  to set the PCRE_STARTLINE flag if all br Line 2406  to set the PCRE_STARTLINE flag if all br
2406    
2407  if ((options & PCRE_ANCHORED) == 0)  if ((options & PCRE_ANCHORED) == 0)
2408    {    {
2409    if (is_anchored(re->code, (options & PCRE_MULTILINE) != 0))    int temp_options = options;
2410      if (is_anchored(re->code, &temp_options))
2411      re->options |= PCRE_ANCHORED;      re->options |= PCRE_ANCHORED;
2412    else    else
2413      {      {
2414      int ch = find_firstchar(re->code);      int ch = find_firstchar(re->code, &temp_options);
2415      if (ch >= 0)      if (ch >= 0)
2416        {        {
2417        re->first_char = ch;        re->first_char = ch;
# Line 1970  if ((options & PCRE_ANCHORED) == 0) Line 2426  if ((options & PCRE_ANCHORED) == 0)
2426    
2427  #ifdef DEBUG  #ifdef DEBUG
2428    
2429  printf("Length = %d top_bracket = %d top_backref=%d\n",  printf("Length = %d top_bracket = %d top_backref = %d\n",
2430    length, re->top_bracket, re->top_backref);    length, re->top_bracket, re->top_backref);
2431    
2432  if (re->options != 0)  if (re->options != 0)
2433    {    {
2434    printf("%s%s%s%s%s%s%s\n",    printf("%s%s%s%s%s%s%s%s\n",
2435      ((re->options & PCRE_ANCHORED) != 0)? "anchored " : "",      ((re->options & PCRE_ANCHORED) != 0)? "anchored " : "",
2436      ((re->options & PCRE_CASELESS) != 0)? "caseless " : "",      ((re->options & PCRE_CASELESS) != 0)? "caseless " : "",
2437      ((re->options & PCRE_EXTENDED) != 0)? "extended " : "",      ((re->options & PCRE_EXTENDED) != 0)? "extended " : "",
2438      ((re->options & PCRE_MULTILINE) != 0)? "multiline " : "",      ((re->options & PCRE_MULTILINE) != 0)? "multiline " : "",
2439      ((re->options & PCRE_DOTALL) != 0)? "dotall " : "",      ((re->options & PCRE_DOTALL) != 0)? "dotall " : "",
2440      ((re->options & PCRE_DOLLAR_ENDONLY) != 0)? "endonly " : "",      ((re->options & PCRE_DOLLAR_ENDONLY) != 0)? "endonly " : "",
2441      ((re->options & PCRE_EXTRA) != 0)? "extra " : "");      ((re->options & PCRE_EXTRA) != 0)? "extra " : "",
2442        ((re->options & PCRE_UNGREEDY) != 0)? "ungreedy " : "");
2443    }    }
2444    
2445  if ((re->options & PCRE_FIRSTSET) != 0)  if ((re->options & PCRE_FIRSTSET) != 0)
# Line 2008  while (code < code_end) Line 2465  while (code < code_end)
2465    
2466    else switch(*code)    else switch(*code)
2467      {      {
2468        case OP_OPT:
2469        printf(" %.2x %s", code[1], OP_names[*code]);
2470        code++;
2471        break;
2472    
2473        case OP_COND:
2474        printf("%3d Cond", (code[1] << 8) + code[2]);
2475        code += 2;
2476        break;
2477    
2478        case OP_CREF:
2479        printf(" %.2d %s", code[1], OP_names[*code]);
2480        code++;
2481        break;
2482    
2483      case OP_CHARS:      case OP_CHARS:
2484      charlength = *(++code);      charlength = *(++code);
2485      printf("%3d ", charlength);      printf("%3d ", charlength);
# Line 2021  while (code < code_end) Line 2493  while (code < code_end)
2493      case OP_KET:      case OP_KET:
2494      case OP_ASSERT:      case OP_ASSERT:
2495      case OP_ASSERT_NOT:      case OP_ASSERT_NOT:
2496        case OP_ASSERTBACK:
2497        case OP_ASSERTBACK_NOT:
2498      case OP_ONCE:      case OP_ONCE:
2499      printf("%3d %s", (code[1] << 8) + code[2], OP_names[*code]);      printf("%3d %s", (code[1] << 8) + code[2], OP_names[*code]);
2500      code += 2;      code += 2;
2501      break;      break;
2502    
2503        case OP_REVERSE:
2504        printf("%3d %s", (code[1] << 8) + code[2], OP_names[*code]);
2505        code += 2;
2506        break;
2507    
2508      case OP_STAR:      case OP_STAR:
2509      case OP_MINSTAR:      case OP_MINSTAR:
2510      case OP_PLUS:      case OP_PLUS:
# Line 2099  while (code < code_end) Line 2578  while (code < code_end)
2578      goto CLASS_REF_REPEAT;      goto CLASS_REF_REPEAT;
2579    
2580      case OP_CLASS:      case OP_CLASS:
     case OP_NEGCLASS:  
2581        {        {
2582        int i, min, max;        int i, min, max;
2583          code++;
2584        if (*code++ == OP_CLASS) printf("    [");        printf("    [");
         else printf("   ^[");  
2585    
2586        for (i = 0; i < 256; i++)        for (i = 0; i < 256; i++)
2587          {          {
# Line 2186  return (pcre *)re; Line 2663  return (pcre *)re;
2663    
2664    
2665  /*************************************************  /*************************************************
 *        Match a character type                  *  
 *************************************************/  
   
 /* Not used in all the places it might be as it's sometimes faster  
 to put the code inline.  
   
 Arguments:  
   type        the character type  
   c           the character  
   dotall      the dotall flag  
   
 Returns:      TRUE if character is of the type  
 */  
   
 static BOOL  
 match_type(int type, int c, BOOL dotall)  
 {  
   
 #ifdef DEBUG  
 if (isprint(c)) printf("matching subject %c against ", c);  
   else printf("matching subject \\x%02x against ", c);  
 printf("%s\n", OP_names[type]);  
 #endif  
   
 switch(type)  
   {  
   case OP_ANY:            return dotall || c != '\n';  
   case OP_NOT_DIGIT:      return (pcre_ctypes[c] & ctype_digit) == 0;  
   case OP_DIGIT:          return (pcre_ctypes[c] & ctype_digit) != 0;  
   case OP_NOT_WHITESPACE: return (pcre_ctypes[c] & ctype_space) == 0;  
   case OP_WHITESPACE:     return (pcre_ctypes[c] & ctype_space) != 0;  
   case OP_NOT_WORDCHAR:   return (pcre_ctypes[c] & ctype_word) == 0;  
   case OP_WORDCHAR:       return (pcre_ctypes[c] & ctype_word) != 0;  
   }  
 return FALSE;  
 }  
   
   
   
 /*************************************************  
2666  *          Match a back-reference                *  *          Match a back-reference                *
2667  *************************************************/  *************************************************/
2668    
2669  /* If a back reference hasn't been set, the match fails.  /* If a back reference hasn't been set, the length that is passed is greater
2670    than the number of characters left in the string, so the match fails.
2671    
2672  Arguments:  Arguments:
2673    number      reference number    offset      index into the offset vector
2674    eptr        points into the subject    eptr        points into the subject
2675    length      length to be matched    length      length to be matched
2676    md          points to match data block    md          points to match data block
2677      ims         the ims flags
2678    
2679  Returns:      TRUE if matched  Returns:      TRUE if matched
2680  */  */
2681    
2682  static BOOL  static BOOL
2683  match_ref(int number, register const uschar *eptr, int length, match_data *md)  match_ref(int offset, register const uschar *eptr, int length, match_data *md,
2684      int ims)
2685  {  {
2686  const uschar *p = md->start_subject + md->offset_vector[number];  const uschar *p = md->start_subject + md->offset_vector[offset];
2687    
2688  #ifdef DEBUG  #ifdef DEBUG
2689  if (eptr >= md->end_subject)  if (eptr >= md->end_subject)
# Line 2260  printf("\n"); Line 2700  printf("\n");
2700    
2701  /* Always fail if not enough characters left */  /* Always fail if not enough characters left */
2702    
2703  if (length > md->end_subject - p) return FALSE;  if (length > md->end_subject - eptr) return FALSE;
2704    
2705  /* Separate the caselesss case for speed */  /* Separate the caselesss case for speed */
2706    
2707  if (md->caseless)  if ((ims & PCRE_CASELESS) != 0)
2708    { while (length-- > 0) if (pcre_lcc[*p++] != pcre_lcc[*eptr++]) return FALSE; }    {
2709      while (length-- > 0)
2710        if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE;
2711      }
2712  else  else
2713    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
2714    
# Line 2278  return TRUE; Line 2721  return TRUE;
2721  *         Match from current position            *  *         Match from current position            *
2722  *************************************************/  *************************************************/
2723    
2724  /* On entry ecode points to the first opcode, and eptr to the first character.  /* On entry ecode points to the first opcode, and eptr to the first character
2725    in the subject string, while eptrb holds the value of eptr at the start of the
2726    last bracketed group - used for breaking infinite loops matching zero-length
2727    strings.
2728    
2729  Arguments:  Arguments:
2730     eptr        pointer in subject     eptr        pointer in subject
2731     ecode       position in code     ecode       position in code
2732     offset_top  current top pointer     offset_top  current top pointer
2733     md          pointer to "static" info for the match     md          pointer to "static" info for the match
2734       ims         current /i, /m, and /s options
2735       condassert  TRUE if called to check a condition assertion
2736       eptrb       eptr at start of last bracket
2737    
2738  Returns:       TRUE if matched  Returns:       TRUE if matched
2739  */  */
2740    
2741  static BOOL  static BOOL
2742  match(register const uschar *eptr, register const uschar *ecode, int offset_top,  match(register const uschar *eptr, register const uschar *ecode,
2743    match_data *md)    int offset_top, match_data *md, int ims, BOOL condassert, const uschar *eptrb)
2744  {  {
2745    int original_ims = ims;   /* Save for resetting on ')' */
2746    
2747  for (;;)  for (;;)
2748    {    {
2749      int op = (int)*ecode;
2750    int min, max, ctype;    int min, max, ctype;
2751    register int i;    register int i;
2752    register int c;    register int c;
2753    BOOL minimize = FALSE;    BOOL minimize = FALSE;
2754    
2755    /* Opening bracket. Check the alternative branches in turn, failing if none    /* Opening capturing bracket. If there is space in the offset vector, save
2756    match. We have to set the start offset if required and there is space    the current subject position in the working slot at the top of the vector. We
2757    in the offset vector so that it is available for subsequent back references    mustn't change the current values of the data slot, because they may be set
2758    if the bracket matches. However, if the bracket fails, we must put back the    from a previous iteration of this group, and be referred to by a reference
2759    previous value of both offsets in case they were set by a previous copy of    inside the group.
2760    the same bracket. Don't worry about setting the flag for the error case here;  
2761    that is handled in the code for KET. */    If the bracket fails to match, we need to restore this value and also the
2762      values of the final offsets, in case they were set by a previous iteration of
2763      the same bracket.
2764    
2765      If there isn't enough space in the offset vector, treat this as if it were a
2766      non-capturing bracket. Don't worry about setting the flag for the error case
2767      here; that is handled in the code for KET. */
2768    
2769    if ((int)*ecode >= OP_BRA)    if (op > OP_BRA)
2770      {      {
2771      int number = (*ecode - OP_BRA) << 1;      int number = op - OP_BRA;
2772      int save_offset1 = 0, save_offset2 = 0;      int offset = number << 1;
2773    
2774      DPRINTF(("start bracket %d\n", number/2));      DPRINTF(("start bracket %d\n", number));
2775    
2776      if (number > 0 && number < md->offset_end)      if (offset < md->offset_max)
2777        {        {
2778        save_offset1 = md->offset_vector[number];        int save_offset1 = md->offset_vector[offset];
2779        save_offset2 = md->offset_vector[number+1];        int save_offset2 = md->offset_vector[offset+1];
2780        md->offset_vector[number] = eptr - md->start_subject;        int save_offset3 = md->offset_vector[md->offset_end - number];
2781    
2782        DPRINTF(("saving %d %d\n", save_offset1, save_offset2));        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
2783          md->offset_vector[md->offset_end - number] = eptr - md->start_subject;
2784    
2785          do
2786            {
2787            if (match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr)) return TRUE;
2788            ecode += (ecode[1] << 8) + ecode[2];
2789            }
2790          while (*ecode == OP_ALT);
2791    
2792          DPRINTF(("bracket %d failed\n", number));
2793    
2794          md->offset_vector[offset] = save_offset1;
2795          md->offset_vector[offset+1] = save_offset2;
2796          md->offset_vector[md->offset_end - number] = save_offset3;
2797          return FALSE;
2798        }        }
2799    
2800      /* Recurse for all the alternatives. */      /* Insufficient room for saving captured contents */
2801    
2802        else op = OP_BRA;
2803        }
2804    
2805      /* Other types of node can be handled by a switch */
2806    
2807      switch(op)
2808        {
2809        case OP_BRA:     /* Non-capturing bracket: optimized */
2810        DPRINTF(("start bracket 0\n"));
2811      do      do
2812        {        {
2813        if (match(eptr, ecode+3, offset_top, md)) return TRUE;        if (match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr)) return TRUE;
2814        ecode += (ecode[1] << 8) + ecode[2];        ecode += (ecode[1] << 8) + ecode[2];
2815        }        }
2816      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
2817        DPRINTF(("bracket 0 failed\n"));
2818        return FALSE;
2819    
2820      DPRINTF(("bracket %d failed\n", number/2));      /* Conditional group: compilation checked that there are no more than
2821        two branches. If the condition is false, skipping the first branch takes us
2822        past the end if there is only one branch, but that's OK because that is
2823        exactly what going to the ket would do. */
2824    
2825        case OP_COND:
2826        if (ecode[3] == OP_CREF)         /* Condition is extraction test */
2827          {
2828          int offset = ecode[4] << 1;    /* Doubled reference number */
2829          return match(eptr,
2830            ecode + ((offset < offset_top && md->offset_vector[offset] >= 0)?
2831              5 : 3 + (ecode[1] << 8) + ecode[2]),
2832            offset_top, md, ims, FALSE, eptr);
2833          }
2834    
2835        /* The condition is an assertion. Call match() to evaluate it - setting
2836        the final argument TRUE causes it to stop at the end of an assertion. */
2837    
2838      if (number > 0 && number < md->offset_end)      else
2839        {        {
2840        md->offset_vector[number] = save_offset1;        if (match(eptr, ecode+3, offset_top, md, ims, TRUE, NULL))
2841        md->offset_vector[number+1] = save_offset2;          {
2842            ecode += 3 + (ecode[4] << 8) + ecode[5];
2843            while (*ecode == OP_ALT) ecode += (ecode[1] << 8) + ecode[2];
2844            }
2845          else ecode += (ecode[1] << 8) + ecode[2];
2846          return match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr);
2847        }        }
2848        /* Control never reaches here */
2849    
2850      return FALSE;      /* Skip over conditional reference data if encountered (should not be) */
     }  
2851    
2852    /* Other types of node can be handled by a switch */      case OP_CREF:
2853        ecode += 2;
2854        break;
2855    
2856        /* End of the pattern */
2857    
   switch(*ecode)  
     {  
2858      case OP_END:      case OP_END:
2859      md->end_match_ptr = eptr;          /* Record where we ended */      md->end_match_ptr = eptr;          /* Record where we ended */
2860      md->end_offset_top = offset_top;   /* and how many extracts were taken */      md->end_offset_top = offset_top;   /* and how many extracts were taken */
2861      return TRUE;      return TRUE;
2862    
2863      /* The equivalent of Prolog's "cut" - if the rest doesn't match, the      /* Change option settings */
     whole thing doesn't match, so we have to get out via a longjmp(). */  
2864    
2865      case OP_CUT:      case OP_OPT:
2866      if (match(eptr, ecode+1, offset_top, md)) return TRUE;      ims = ecode[1];
2867      longjmp(md->fail_env, 1);      ecode += 2;
2868        DPRINTF(("ims set to %02x\n", ims));
2869        break;
2870    
2871      /* Assertion brackets. Check the alternative branches in turn - the      /* Assertion brackets. Check the alternative branches in turn - the
2872      matching won't pass the KET for an assertion. If any one branch matches,      matching won't pass the KET for an assertion. If any one branch matches,
2873      the assertion is true. */      the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
2874        start of each branch to move the current point backwards, so the code at
2875        this level is identical to the lookahead case. */
2876    
2877      case OP_ASSERT:      case OP_ASSERT:
2878        case OP_ASSERTBACK:
2879      do      do
2880        {        {
2881        if (match(eptr, ecode+3, offset_top, md)) break;        if (match(eptr, ecode+3, offset_top, md, ims, FALSE, NULL)) break;
2882        ecode += (ecode[1] << 8) + ecode[2];        ecode += (ecode[1] << 8) + ecode[2];
2883        }        }
2884      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
2885      if (*ecode == OP_KET) return FALSE;      if (*ecode == OP_KET) return FALSE;
2886    
2887        /* If checking an assertion for a condition, return TRUE. */
2888    
2889        if (condassert) return TRUE;
2890    
2891      /* Continue from after the assertion, updating the offsets high water      /* Continue from after the assertion, updating the offsets high water
2892      mark, since extracts may have been taken during the assertion. */      mark, since extracts may have been taken during the assertion. */
2893    
# Line 2384  for (;;) Line 2899  for (;;)
2899      /* Negative assertion: all branches must fail to match */      /* Negative assertion: all branches must fail to match */
2900    
2901      case OP_ASSERT_NOT:      case OP_ASSERT_NOT:
2902        case OP_ASSERTBACK_NOT:
2903      do      do
2904        {        {
2905        if (match(eptr, ecode+3, offset_top, md)) return FALSE;        if (match(eptr, ecode+3, offset_top, md, ims, FALSE, NULL)) return FALSE;
2906        ecode += (ecode[1] << 8) + ecode[2];        ecode += (ecode[1] << 8) + ecode[2];
2907        }        }
2908      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
2909    
2910        if (condassert) return TRUE;
2911      ecode += 3;      ecode += 3;
2912      continue;      continue;
2913    
2914        /* Move the subject pointer back. This occurs only at the start of
2915        each branch of a lookbehind assertion. If we are too close to the start to
2916        move back, this match function fails. */
2917    
2918        case OP_REVERSE:
2919        eptr -= (ecode[1] << 8) + ecode[2];
2920        if (eptr < md->start_subject) return FALSE;
2921        ecode += 3;
2922        break;
2923    
2924    
2925      /* "Once" brackets are like assertion brackets except that after a match,      /* "Once" brackets are like assertion brackets except that after a match,
2926      the point in the subject string is not moved back. Thus there can never be      the point in the subject string is not moved back. Thus there can never be
2927      a move back into the brackets. Check the alternative branches in turn - the      a move back into the brackets. Check the alternative branches in turn - the
2928      matching won't pass the KET for this kind of subpattern. If any one branch      matching won't pass the KET for this kind of subpattern. If any one branch
2929      matches, we carry on, leaving the subject pointer. */      matches, we carry on as at the end of a normal bracket, leaving the subject
2930        pointer. */
2931    
2932      case OP_ONCE:      case OP_ONCE:
     do  
2933        {        {
2934        if (match(eptr, ecode+3, offset_top, md)) break;        const uschar *prev = ecode;
       ecode += (ecode[1] << 8) + ecode[2];  
       }  
     while (*ecode == OP_ALT);  
     if (*ecode == OP_KET) return FALSE;  
2935    
2936      /* Continue as from after the assertion, updating the offsets high water        do
2937      mark, since extracts may have been taken. */          {
2938            if (match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr)) break;
2939            ecode += (ecode[1] << 8) + ecode[2];
2940            }
2941          while (*ecode == OP_ALT);
2942    
2943      do ecode += (ecode[1] << 8) + ecode[2]; while (*ecode == OP_ALT);        /* If hit the end of the group (which could be repeated), fail */
2944      ecode += 3;  
2945      offset_top = md->end_offset_top;        if (*ecode != OP_ONCE && *ecode != OP_ALT) return FALSE;
2946      eptr = md->end_match_ptr;  
2947      continue;        /* Continue as from after the assertion, updating the offsets high water
2948          mark, since extracts may have been taken. */
2949    
2950          do ecode += (ecode[1] << 8) + ecode[2]; while (*ecode == OP_ALT);
2951    
2952          offset_top = md->end_offset_top;
2953          eptr = md->end_match_ptr;
2954    
2955          /* For a non-repeating ket, just continue at this level. This also
2956          happens for a repeating ket if no characters were matched in the group.
2957          This is the forcible breaking of infinite loops as implemented in Perl
2958          5.005. If there is an options reset, it will get obeyed in the normal
2959          course of events. */
2960    
2961          if (*ecode == OP_KET || eptr == eptrb)
2962            {
2963            ecode += 3;
2964            break;
2965            }
2966    
2967          /* The repeating kets try the rest of the pattern or restart from the
2968          preceding bracket, in the appropriate order. We need to reset any options
2969          that changed within the bracket before re-running it, so check the next
2970          opcode. */
2971    
2972          if (ecode[3] == OP_OPT)
2973            {
2974            ims = (ims & ~PCRE_IMS) | ecode[4];
2975            DPRINTF(("ims set to %02x at group repeat\n", ims));
2976            }
2977    
2978          if (*ecode == OP_KETRMIN)
2979            {
2980            if (match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr) ||
2981                match(eptr, prev, offset_top, md, ims, FALSE, eptr)) return TRUE;
2982            }
2983          else  /* OP_KETRMAX */
2984            {
2985            if (match(eptr, prev, offset_top, md, ims, FALSE, eptr) ||
2986                match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr)) return TRUE;
2987            }
2988          }
2989        return FALSE;
2990    
2991      /* An alternation is the end of a branch; scan along to find the end of the      /* An alternation is the end of a branch; scan along to find the end of the
2992      bracketed group and go to there. */      bracketed group and go to there. */
# Line 2433  for (;;) Line 3004  for (;;)
3004      case OP_BRAZERO:      case OP_BRAZERO:
3005        {        {
3006        const uschar *next = ecode+1;        const uschar *next = ecode+1;
3007        if (match(eptr, next, offset_top, md)) return TRUE;        if (match(eptr, next, offset_top, md, ims, FALSE, eptr)) return TRUE;
3008        do next += (next[1] << 8) + next[2]; while (*next == OP_ALT);        do next += (next[1] << 8) + next[2]; while (*next == OP_ALT);
3009        ecode = next + 3;        ecode = next + 3;
3010        }        }
# Line 2443  for (;;) Line 3014  for (;;)
3014        {        {
3015        const uschar *next = ecode+1;        const uschar *next = ecode+1;
3016        do next += (next[1] << 8) + next[2]; while (*next == OP_ALT);        do next += (next[1] << 8) + next[2]; while (*next == OP_ALT);
3017        if (match(eptr, next+3, offset_top, md)) return TRUE;        if (match(eptr, next+3, offset_top, md, ims, FALSE, eptr)) return TRUE;
3018        ecode++;        ecode++;
3019        }        }
3020      break;;      break;
3021    
3022      /* End of a group, repeated or non-repeating. If we are at the end of      /* End of a group, repeated or non-repeating. If we are at the end of
3023      an assertion "group", stop matching and return TRUE, but record the      an assertion "group", stop matching and return TRUE, but record the
3024      current high water mark for use by positive assertions. */      current high water mark for use by positive assertions. Do this also
3025        for the "once" (not-backup up) groups. */
3026    
3027      case OP_KET:      case OP_KET:
3028      case OP_KETRMIN:      case OP_KETRMIN:
3029      case OP_KETRMAX:      case OP_KETRMAX:
3030        {        {
       int number;  
3031        const uschar *prev = ecode - (ecode[1] << 8) - ecode[2];        const uschar *prev = ecode - (ecode[1] << 8) - ecode[2];
3032    
3033        if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT || *prev == OP_ONCE)        if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
3034              *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
3035              *prev == OP_ONCE)
3036          {          {
3037          md->end_match_ptr = eptr;      /* For ONCE */          md->end_match_ptr = eptr;      /* For ONCE */
3038          md->end_offset_top = offset_top;          md->end_offset_top = offset_top;
3039          return TRUE;          return TRUE;
3040          }          }
3041    
3042        /* In all other cases we have to check the group number back at the        /* In all other cases except a conditional group we have to check the
3043        start and if necessary complete handling an extraction by setting the        group number back at the start and if necessary complete handling an
3044        final offset and bumping the high water mark. */        extraction by setting the offsets and bumping the high water mark. */
3045    
3046        number = (*prev - OP_BRA) << 1;        if (*prev != OP_COND)
3047            {
3048            int number = *prev - OP_BRA;
3049            int offset = number << 1;
3050    
3051        DPRINTF(("end bracket %d\n", number/2));          DPRINTF(("end bracket %d\n", number));
3052    
3053        if (number > 0)          if (number > 0)
         {  
         if (number >= md->offset_end) md->offset_overflow = TRUE; else  
3054            {            {
3055            md->offset_vector[number+1] = eptr - md->start_subject;            if (offset >= md->offset_max) md->offset_overflow = TRUE; else
3056            if (offset_top <= number) offset_top = number + 2;              {
3057                md->offset_vector[offset] =
3058                  md->offset_vector[md->offset_end - number];
3059                md->offset_vector[offset+1] = eptr - md->start_subject;
3060                if (offset_top <= offset) offset_top = offset + 2;
3061                }
3062            }            }
3063          }          }
3064    
3065        /* For a non-repeating ket, just advance to the next node and continue at        /* Reset the value of the ims flags, in case they got changed during
3066        this level. */        the group. */
3067    
3068          ims = original_ims;
3069          DPRINTF(("ims reset to %02x\n", ims));
3070    
3071        if (*ecode == OP_KET)        /* For a non-repeating ket, just continue at this level. This also
3072          happens for a repeating ket if no characters were matched in the group.
3073          This is the forcible breaking of infinite loops as implemented in Perl
3074          5.005. If there is an options reset, it will get obeyed in the normal
3075          course of events. */
3076    
3077          if (*ecode == OP_KET || eptr == eptrb)
3078          {          {
3079          ecode += 3;          ecode += 3;
3080          break;          break;
# Line 2497  for (;;) Line 3085  for (;;)
3085    
3086        if (*ecode == OP_KETRMIN)        if (*ecode == OP_KETRMIN)
3087          {          {
3088          if (match(eptr, ecode+3, offset_top, md) ||          if (match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr) ||
3089              match(eptr, prev, offset_top, md)) return TRUE;              match(eptr, prev, offset_top, md, ims, FALSE, eptr)) return TRUE;
3090          }          }
3091        else  /* OP_KETRMAX */        else  /* OP_KETRMAX */
3092          {          {
3093          if (match(eptr, prev, offset_top, md) ||          if (match(eptr, prev, offset_top, md, ims, FALSE, eptr) ||
3094              match(eptr, ecode+3, offset_top, md)) return TRUE;              match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr)) return TRUE;
3095          }          }
3096        }        }
3097      return FALSE;      return FALSE;
# Line 2512  for (;;) Line 3100  for (;;)
3100    
3101      case OP_CIRC:      case OP_CIRC:
3102      if (md->notbol && eptr == md->start_subject) return FALSE;      if (md->notbol && eptr == md->start_subject) return FALSE;
3103      if (md->multiline)      if ((ims & PCRE_MULTILINE) != 0)
3104        {        {
3105        if (eptr != md->start_subject && eptr[-1] != '\n') return FALSE;        if (eptr != md->start_subject && eptr[-1] != '\n') return FALSE;
3106        ecode++;        ecode++;
# Line 2527  for (;;) Line 3115  for (;;)
3115      ecode++;      ecode++;
3116      break;      break;
3117    
3118      /* Assert before internal newline if multiline, or before      /* Assert before internal newline if multiline, or before a terminating
3119      a terminating newline unless endonly is set, else end of subject unless      newline unless endonly is set, else end of subject unless noteol is set. */
     noteol is set. */  
3120    
3121      case OP_DOLL:      case OP_DOLL:
3122      if (md->noteol && eptr >= md->end_subject) return FALSE;      if ((ims & PCRE_MULTILINE) != 0)
     if (md->multiline)  
3123        {        {
3124        if (eptr < md->end_subject && *eptr != '\n') return FALSE;        if (eptr < md->end_subject) { if (*eptr != '\n') return FALSE; }
3125            else { if (md->noteol) return FALSE; }
3126        ecode++;        ecode++;
3127        break;        break;
3128        }        }
3129      else if (!md->endonly)      else
3130        {        {
3131        if (eptr < md->end_subject - 1 ||        if (md->noteol) return FALSE;
3132           (eptr == md->end_subject - 1 && *eptr != '\n')) return FALSE;        if (!md->endonly)
3133        ecode++;          {
3134        break;          if (eptr < md->end_subject - 1 ||
3135               (eptr == md->end_subject - 1 && *eptr != '\n')) return FALSE;
3136    
3137            ecode++;
3138            break;
3139            }
3140        }        }
3141      /* ... else fall through */      /* ... else fall through */
3142    
3143      /* End of subject assertion */      /* End of subject assertion (\z) */
3144    
3145      case OP_EOD:      case OP_EOD:
3146      if (eptr < md->end_subject) return FALSE;      if (eptr < md->end_subject) return FALSE;
3147      ecode++;      ecode++;
3148      break;      break;
3149    
3150        /* End of subject or ending \n assertion (\Z) */
3151    
3152        case OP_EODN:
3153        if (eptr < md->end_subject - 1 ||
3154           (eptr == md->end_subject - 1 && *eptr != '\n')) return FALSE;
3155        ecode++;
3156        break;
3157    
3158      /* Word boundary assertions */      /* Word boundary assertions */
3159    
3160      case OP_NOT_WORD_BOUNDARY:      case OP_NOT_WORD_BOUNDARY:
3161      case OP_WORD_BOUNDARY:      case OP_WORD_BOUNDARY:
3162        {        {
3163        BOOL prev_is_word = (eptr != md->start_subject) &&        BOOL prev_is_word = (eptr != md->start_subject) &&
3164          ((pcre_ctypes[eptr[-1]] & ctype_word) != 0);          ((md->ctypes[eptr[-1]] & ctype_word) != 0);
3165        BOOL cur_is_word = (eptr < md->end_subject) &&        BOOL cur_is_word = (eptr < md->end_subject) &&
3166          ((pcre_ctypes[*eptr] & ctype_word) != 0);          ((md->ctypes[*eptr] & ctype_word) != 0);
3167        if ((*ecode++ == OP_WORD_BOUNDARY)?        if ((*ecode++ == OP_WORD_BOUNDARY)?
3168             cur_is_word == prev_is_word : cur_is_word != prev_is_word)             cur_is_word == prev_is_word : cur_is_word != prev_is_word)
3169          return FALSE;          return FALSE;
# Line 2573  for (;;) Line 3173  for (;;)
3173      /* Match a single character type; inline for speed */      /* Match a single character type; inline for speed */
3174    
3175      case OP_ANY:      case OP_ANY:
3176      if (!md->dotall && eptr < md->end_subject && *eptr == '\n') return FALSE;      if ((ims & PCRE_DOTALL) == 0 && eptr < md->end_subject && *eptr == '\n')
3177          return FALSE;
3178      if (eptr++ >= md->end_subject) return FALSE;      if (eptr++ >= md->end_subject) return FALSE;
3179      ecode++;      ecode++;
3180      break;      break;
3181    
3182      case OP_NOT_DIGIT:      case OP_NOT_DIGIT:
3183      if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_digit) != 0)      if (eptr >= md->end_subject ||
3184           (md->ctypes[*eptr++] & ctype_digit) != 0)
3185        return FALSE;        return FALSE;
3186      ecode++;      ecode++;
3187      break;      break;
3188    
3189      case OP_DIGIT:      case OP_DIGIT:
3190      if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_digit) == 0)      if (eptr >= md->end_subject ||
3191           (md->ctypes[*eptr++] & ctype_digit) == 0)
3192        return FALSE;        return FALSE;
3193      ecode++;      ecode++;
3194      break;      break;
3195    
3196      case OP_NOT_WHITESPACE:      case OP_NOT_WHITESPACE:
3197      if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_space) != 0)      if (eptr >= md->end_subject ||
3198           (md->ctypes[*eptr++] & ctype_space) != 0)
3199        return FALSE;        return FALSE;
3200      ecode++;      ecode++;
3201      break;      break;
3202    
3203      case OP_WHITESPACE:      case OP_WHITESPACE:
3204      if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_space) == 0)      if (eptr >= md->end_subject ||
3205           (md->ctypes[*eptr++] & ctype_space) == 0)
3206        return FALSE;        return FALSE;
3207      ecode++;      ecode++;
3208      break;      break;
3209    
3210      case OP_NOT_WORDCHAR:      case OP_NOT_WORDCHAR:
3211      if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_word) != 0)      if (eptr >= md->end_subject ||
3212           (md->ctypes[*eptr++] & ctype_word) != 0)
3213        return FALSE;        return FALSE;
3214      ecode++;      ecode++;
3215      break;      break;
3216    
3217      case OP_WORDCHAR:      case OP_WORDCHAR:
3218      if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_word) == 0)      if (eptr >= md->end_subject ||
3219           (md->ctypes[*eptr++] & ctype_word) == 0)
3220        return FALSE;        return FALSE;
3221      ecode++;      ecode++;
3222      break;      break;
# Line 2625  for (;;) Line 3232  for (;;)
3232      case OP_REF:      case OP_REF:
3233        {        {
3234        int length;        int length;
3235        int number = ecode[1] << 1;                /* Doubled reference number */        int offset = ecode[1] << 1;                /* Doubled reference number */
3236        ecode += 2;                                /* Advance past the item */        ecode += 2;                                /* Advance past the item */
3237    
3238        if (number >= offset_top || md->offset_vector[number] < 0)        /* If the reference is unset, set the length to be longer than the amount
3239          {        of subject left; this ensures that every attempt at a match fails. We
3240          md->errorcode = PCRE_ERROR_BADREF;        can't just fail here, because of the possibility of quantifiers with zero
3241          return FALSE;        minima. */
3242          }  
3243          length = (offset >= offset_top || md->offset_vector[offset] < 0)?
3244            md->end_subject - eptr + 1 :
3245            md->offset_vector[offset+1] - md->offset_vector[offset];
3246    
3247        length = md->offset_vector[number+1] - md->offset_vector[number];        /* Set up for repetition, or handle the non-repeated case */
3248    
3249        switch (*ecode)        switch (*ecode)
3250          {          {
# Line 2661  for (;;) Line 3271  for (;;)
3271          break;          break;
3272    
3273          default:               /* No repeat follows */          default:               /* No repeat follows */
3274          if (!match_ref(number, eptr, length, md)) return FALSE;          if (!match_ref(offset, eptr, length, md, ims)) return FALSE;
3275          eptr += length;          eptr += length;
3276          continue;              /* With the main loop */          continue;              /* With the main loop */
3277          }          }
# Line 2677  for (;;) Line 3287  for (;;)
3287    
3288        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
3289          {          {
3290          if (!match_ref(number, eptr, length, md)) return FALSE;          if (!match_ref(offset, eptr, length, md, ims)) return FALSE;
3291          eptr += length;          eptr += length;
3292          }          }
3293    
# Line 2692  for (;;) Line 3302  for (;;)
3302          {          {
3303          for (i = min;; i++)          for (i = min;; i++)
3304            {            {
3305            if (match(eptr, ecode, offset_top, md)) return TRUE;            if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb))
3306            if (i >= max || !match_ref(number, eptr, length, md))              return TRUE;
3307              if (i >= max || !match_ref(offset, eptr, length, md, ims))
3308              return FALSE;              return FALSE;
3309            eptr += length;            eptr += length;
3310            }            }
# Line 2707  for (;;) Line 3318  for (;;)
3318          const uschar *pp = eptr;          const uschar *pp = eptr;
3319          for (i = min; i < max; i++)          for (i = min; i < max; i++)
3320            {            {
3321            if (!match_ref(number, eptr, length, md)) break;            if (!match_ref(offset, eptr, length, md, ims)) break;
3322            eptr += length;            eptr += length;
3323            }            }
3324          while (eptr >= pp)          while (eptr >= pp)
3325            {            {
3326            if (match(eptr, ecode, offset_top, md)) return TRUE;            if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb))
3327                return TRUE;
3328            eptr -= length;            eptr -= length;
3329            }            }
3330          return FALSE;          return FALSE;
# Line 2720  for (;;) Line 3332  for (;;)
3332        }        }
3333      /* Control never gets here */      /* Control never gets here */
3334    
3335    
3336    
3337      /* Match a character class, possibly repeatedly. Look past the end of the      /* Match a character class, possibly repeatedly. Look past the end of the
3338      item to see if there is repeat information following. Then obey similar      item to see if there is repeat information following. Then obey similar
3339      code to character type repeats - written out again for speed. If caseless      code to character type repeats - written out again for speed. */
     matching was set at runtime but not at compile time, we have to check both  
     versions of a character, and we have to behave differently for positive and  
     negative classes. This is the only time where OP_CLASS and OP_NEGCLASS are  
     treated differently. */  
3340    
3341      case OP_CLASS:      case OP_CLASS:
     case OP_NEGCLASS:  
3342        {        {
       BOOL nasty_case = *ecode == OP_NEGCLASS && md->runtime_caseless;  
3343        const uschar *data = ecode + 1;  /* Save for matching */        const uschar *data = ecode + 1;  /* Save for matching */
3344        ecode += 33;                     /* Advance past the item */        ecode += 33;                     /* Advance past the item */
3345    
# Line 2770  for (;;) Line 3378  for (;;)
3378          {          {
3379          if (eptr >= md->end_subject) return FALSE;          if (eptr >= md->end_subject) return FALSE;
3380          c = *eptr++;          c = *eptr++;
3381            if ((data[c/8] & (1 << (c&7))) != 0) continue;
         /* Either not runtime caseless, or it was a positive class. For  
         runtime caseless, continue if either case is in the map. */  
   
         if (!nasty_case)  
           {  
           if ((data[c/8] & (1 << (c&7))) != 0) continue;  
           if (md->runtime_caseless)  
             {  
             c = pcre_fcc[c];  
             if ((data[c/8] & (1 << (c&7))) != 0) continue;  
             }  
           }  
   
         /* Runtime caseless and it was a negative class. Continue only if  
         both cases are in the map. */  
   
         else  
           {  
           if ((data[c/8] & (1 << (c&7))) == 0) return FALSE;  
           c = pcre_fcc[c];  
           if ((data[c/8] & (1 << (c&7))) != 0) continue;  
           }  
   
3382          return FALSE;          return FALSE;
3383          }          }
3384    
# Line 2809  for (;;) Line 3394  for (;;)
3394          {          {
3395          for (i = min;; i++)          for (i = min;; i++)
3396            {            {
3397            if (match(eptr, ecode, offset_top, md)) return TRUE;            if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb))
3398                return TRUE;
3399            if (i >= max || eptr >= md->end_subject) return FALSE;            if (i >= max || eptr >= md->end_subject) return FALSE;
3400            c = *eptr++;            c = *eptr++;
3401              if ((data[c/8] & (1 << (c&7))) != 0) continue;
           /* Either not runtime caseless, or it was a positive class. For  
           runtime caseless, continue if either case is in the map. */  
   
           if (!nasty_case)  
             {  
             if ((data[c/8] & (1 << (c&7))) != 0) continue;  
             if (md->runtime_caseless)  
               {  
               c = pcre_fcc[c];  
               if ((data[c/8] & (1 << (c&7))) != 0) continue;  
               }  
             }  
   
           /* Runtime caseless and it was a negative class. Continue only if  
           both cases are in the map. */  
   
           else  
             {  
             if ((data[c/8] & (1 << (c&7))) == 0) return FALSE;  
             c = pcre_fcc[c];  
             if ((data[c/8] & (1 << (c&7))) != 0) continue;  
             }  
   
3402            return FALSE;            return FALSE;
3403            }            }
3404          /* Control never gets here */          /* Control never gets here */
# Line 2850  for (;;) Line 3413  for (;;)
3413            {            {
3414            if (eptr >= md->end_subject) break;            if (eptr >= md->end_subject) break;
3415            c = *eptr;            c = *eptr;
3416              if ((data[c/8] & (1 << (c&7))) != 0) continue;
           /* Either not runtime caseless, or it was a positive class. For  
           runtime caseless, continue if either case is in the map. */  
   
           if (!nasty_case)  
             {  
             if ((data[c/8] & (1 << (c&7))) != 0) continue;  
             if (md->runtime_caseless)  
               {  
               c = pcre_fcc[c];  
               if ((data[c/8] & (1 << (c&7))) != 0) continue;  
               }  
             }  
   
           /* Runtime caseless and it was a negative class. Continue only if  
           both cases are in the map. */  
   
           else  
             {  
             if ((data[c/8] & (1 << (c&7))) == 0) break;  
             c = pcre_fcc[c];  
             if ((data[c/8] & (1 << (c&7))) != 0) continue;  
             }  
   
3417            break;            break;
3418            }            }
3419    
3420          while (eptr >= pp)          while (eptr >= pp)
3421            if (match(eptr--, ecode, offset_top, md)) return TRUE;            if (match(eptr--, ecode, offset_top, md, ims, FALSE, eptrb))
3422                return TRUE;
3423          return FALSE;          return FALSE;
3424          }          }
3425        }        }
# Line 2905  for (;;) Line 3446  for (;;)
3446  #endif  #endif
3447    
3448        if (length > md->end_subject - eptr) return FALSE;        if (length > md->end_subject - eptr) return FALSE;
3449        if (md->caseless)        if ((ims & PCRE_CASELESS) != 0)
3450          {          {
3451          while (length-- > 0) if (pcre_lcc[*ecode++] != pcre_lcc[*eptr++]) return FALSE;          while (length-- > 0)
3452              if (md->lcc[*ecode++] != md->lcc[*eptr++])
3453                return FALSE;
3454          }          }
3455        else        else
3456          {          {
# Line 2962  for (;;) Line 3505  for (;;)
3505      DPRINTF(("matching %c{%d,%d} against subject %.*s\n", c, min, max,      DPRINTF(("matching %c{%d,%d} against subject %.*s\n", c, min, max,
3506        max, eptr));        max, eptr));
3507    
3508      if (md->caseless)      if ((ims & PCRE_CASELESS) != 0)
3509        {        {
3510        c = pcre_lcc[c];        c = md->lcc[c];
3511        for (i = 1; i <= min; i++) if (c != pcre_lcc[*eptr++]) return FALSE;        for (i = 1; i <= min; i++)
3512            if (c != md->lcc[*eptr++]) return FALSE;
3513        if (min == max) continue;        if (min == max) continue;
3514        if (minimize)        if (minimize)
3515          {          {
3516          for (i = min;; i++)          for (i = min;; i++)
3517            {            {
3518            if (match(eptr, ecode, offset_top, md)) return TRUE;            if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb))
3519            if (i >= max || eptr >= md->end_subject || c != pcre_lcc[*eptr++])              return TRUE;
3520              if (i >= max || eptr >= md->end_subject ||
3521                  c != md->lcc[*eptr++])
3522              return FALSE;              return FALSE;
3523            }            }
3524          /* Control never gets here */          /* Control never gets here */
# Line 2982  for (;;) Line 3528  for (;;)
3528          const uschar *pp = eptr;          const uschar *pp = eptr;
3529          for (i = min; i < max; i++)          for (i = min; i < max; i++)
3530            {            {
3531            if (eptr >= md->end_subject || c != pcre_lcc[*eptr]) break;            if (eptr >= md->end_subject || c != md->lcc[*eptr]) break;
3532            eptr++;            eptr++;
3533            }            }
3534          while (eptr >= pp)          while (eptr >= pp)
3535            if (match(eptr--, ecode, offset_top, md)) return TRUE;            if (match(eptr--, ecode, offset_top, md, ims, FALSE, eptrb))
3536                return TRUE;
3537          return FALSE;          return FALSE;
3538          }          }
3539        /* Control never gets here */        /* Control never gets here */
# Line 3002  for (;;) Line 3549  for (;;)
3549          {          {
3550          for (i = min;; i++)          for (i = min;; i++)
3551            {            {
3552            if (match(eptr, ecode, offset_top, md)) return TRUE;            if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb))
3553                return TRUE;
3554            if (i >= max || eptr >= md->end_subject || c != *eptr++) return FALSE;            if (i >= max || eptr >= md->end_subject || c != *eptr++) return FALSE;
3555            }            }
3556          /* Control never gets here */          /* Control never gets here */
# Line 3016  for (;;) Line 3564  for (;;)
3564            eptr++;            eptr++;
3565            }            }
3566          while (eptr >= pp)          while (eptr >= pp)
3567           if (match(eptr--, ecode, offset_top, md)) return TRUE;           if (match(eptr--, ecode, offset_top, md, ims, FALSE, eptrb))
3568               return TRUE;
3569          return FALSE;          return FALSE;
3570          }          }
3571        }        }
# Line 3027  for (;;) Line 3576  for (;;)
3576      case OP_NOT:      case OP_NOT:
3577      if (eptr >= md->end_subject) return FALSE;      if (eptr >= md->end_subject) return FALSE;
3578      ecode++;      ecode++;
3579      if (md->caseless)      if ((ims & PCRE_CASELESS) != 0)
3580        {        {
3581        if (pcre_lcc[*ecode++] == pcre_lcc[*eptr++]) return FALSE;        if (md->lcc[*ecode++] == md->lcc[*eptr++]) return FALSE;
3582        }        }
3583      else      else
3584        {        {
# Line 3087  for (;;) Line 3636  for (;;)
3636      DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", c, min, max,      DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", c, min, max,
3637        max, eptr));        max, eptr));
3638    
3639      if (md->caseless)      if ((ims & PCRE_CASELESS) != 0)
3640        {        {
3641        c = pcre_lcc[c];        c = md->lcc[c];
3642        for (i = 1; i <= min; i++) if (c == pcre_lcc[*eptr++]) return FALSE;        for (i = 1; i <= min; i++)
3643            if (c == md->lcc[*eptr++]) return FALSE;
3644        if (min == max) continue;        if (min == max) continue;
3645        if (minimize)        if (minimize)
3646          {          {
3647          for (i = min;; i++)          for (i = min;; i++)
3648            {            {
3649            if (match(eptr, ecode, offset_top, md)) return TRUE;            if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb))
3650            if (i >= max || eptr >= md->end_subject || c == pcre_lcc[*eptr++])              return TRUE;
3651              if (i >= max || eptr >= md->end_subject ||
3652                  c == md->lcc[*eptr++])
3653              return FALSE;              return FALSE;
3654            }            }
3655          /* Control never gets here */          /* Control never gets here */
# Line 3107  for (;;) Line 3659  for (;;)
3659          const uschar *pp = eptr;          const uschar *pp = eptr;
3660          for (i = min; i < max; i++)          for (i = min; i < max; i++)
3661            {            {
3662            if (eptr >= md->end_subject || c == pcre_lcc[*eptr]) break;            if (eptr >= md->end_subject || c == md->lcc[*eptr]) break;
3663            eptr++;            eptr++;
3664            }            }
3665          while (eptr >= pp)          while (eptr >= pp)
3666            if (match(eptr--, ecode, offset_top, md)) return TRUE;            if (match(eptr--, ecode, offset_top, md, ims, FALSE, eptrb))
3667                return TRUE;
3668          return FALSE;          return FALSE;
3669          }          }
3670        /* Control never gets here */        /* Control never gets here */
# Line 3127  for (;;) Line 3680  for (;;)
3680          {          {
3681          for (i = min;; i++)          for (i = min;; i++)
3682            {            {
3683            if (match(eptr, ecode, offset_top, md)) return TRUE;            if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb))
3684                return TRUE;
3685            if (i >= max || eptr >= md->end_subject || c == *eptr++) return FALSE;            if (i >= max || eptr >= md->end_subject || c == *eptr++) return FALSE;
3686            }            }
3687          /* Control never gets here */          /* Control never gets here */
# Line 3141  for (;;) Line 3695  for (;;)
3695            eptr++;            eptr++;
3696            }            }
3697          while (eptr >= pp)          while (eptr >= pp)
3698           if (match(eptr--, ecode, offset_top, md)) return TRUE;           if (match(eptr--, ecode, offset_top, md, ims, FALSE, eptrb))
3699               return TRUE;
3700          return FALSE;          return FALSE;
3701          }          }
3702        }        }
# Line 3191  for (;;) Line 3746  for (;;)
3746      if (min > 0) switch(ctype)      if (min > 0) switch(ctype)
3747        {        {
3748        case OP_ANY:        case OP_ANY:
3749        if (!md->dotall)        if ((ims & PCRE_DOTALL) == 0)
3750          { for (i = 1; i <= min; i++) if (*eptr++ == '\n') return FALSE; }          { for (i = 1; i <= min; i++) if (*eptr++ == '\n') return FALSE; }
3751        else eptr += min;        else eptr += min;
3752        break;        break;
3753    
3754        case OP_NOT_DIGIT:        case OP_NOT_DIGIT:
3755        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
3756          if ((pcre_ctypes[*eptr++] & ctype_digit) != 0) return FALSE;          if ((md->ctypes[*eptr++] & ctype_digit) != 0) return FALSE;
3757        break;        break;
3758    
3759        case OP_DIGIT:        case OP_DIGIT:
3760        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
3761          if ((pcre_ctypes[*eptr++] & ctype_digit) == 0) return FALSE;          if ((md->ctypes[*eptr++] & ctype_digit) == 0) return FALSE;
3762        break;        break;
3763    
3764        case OP_NOT_WHITESPACE:        case OP_NOT_WHITESPACE:
3765        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
3766          if ((pcre_ctypes[*eptr++] & ctype_space) != 0) return FALSE;          if ((md->ctypes[*eptr++] & ctype_space) != 0) return FALSE;
3767        break;        break;
3768    
3769        case OP_WHITESPACE:        case OP_WHITESPACE:
3770        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
3771          if ((pcre_ctypes[*eptr++] & ctype_space) == 0) return FALSE;          if ((md->ctypes[*eptr++] & ctype_space) == 0) return FALSE;
3772        break;        break;
3773    
3774        case OP_NOT_WORDCHAR:        case OP_NOT_WORDCHAR:
3775        for (i = 1; i <= min; i++) if ((pcre_ctypes[*eptr++] & ctype_word) != 0)        for (i = 1; i <= min; i++)
3776          return FALSE;          if ((md->ctypes[*eptr++] & ctype_word) != 0)
3777              return FALSE;
3778        break;        break;
3779    
3780        case OP_WORDCHAR:        case OP_WORDCHAR:
3781        for (i = 1; i <= min; i++) if ((pcre_ctypes[*eptr++] & ctype_word) == 0)        for (i = 1; i <= min; i++)
3782          return FALSE;          if ((md->ctypes[*eptr++] & ctype_word) == 0)
3783              return FALSE;
3784        break;        break;
3785        }        }
3786    
# Line 3232  for (;;) Line 3789  for (;;)
3789      if (min == max) continue;      if (min == max) continue;
3790    
3791      /* If minimizing, we have to test the rest of the pattern before each      /* If minimizing, we have to test the rest of the pattern before each
3792      subsequent match, so inlining isn't much help; just use the function. */      subsequent match. */
3793    
3794      if (minimize)      if (minimize)
3795        {        {
3796        for (i = min;; i++)        for (i = min;; i++)
3797          {          {
3798          if (match(eptr, ecode, offset_top, md)) return TRUE;          if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb)) return TRUE;
3799          if (i >= max || eptr >= md->end_subject ||          if (i >= max || eptr >= md->end_subject) return FALSE;
3800            !match_type(ctype, *eptr++, md->dotall))  
3801              return FALSE;          c = *eptr++;
3802            switch(ctype)
3803              {
3804              case OP_ANY:
3805              if ((ims & PCRE_DOTALL) == 0 && c == '\n') return FALSE;
3806              break;
3807    
3808              case OP_NOT_DIGIT:
3809              if ((md->ctypes[c] & ctype_digit) != 0) return FALSE;
3810              break;
3811    
3812              case OP_DIGIT:
3813              if ((md->ctypes[c] & ctype_digit) == 0) return FALSE;
3814              break;
3815    
3816              case OP_NOT_WHITESPACE:
3817              if ((md->ctypes[c] & ctype_space) != 0) return FALSE;
3818              break;
3819    
3820              case OP_WHITESPACE:
3821              if  ((md->ctypes[c] & ctype_space) == 0) return FALSE;
3822              break;
3823    
3824              case OP_NOT_WORDCHAR:
3825              if ((md->ctypes[c] & ctype_word) != 0) return FALSE;
3826              break;
3827    
3828              case OP_WORDCHAR:
3829              if ((md->ctypes[c] & ctype_word) == 0) return FALSE;
3830              break;
3831              }
3832          }          }
3833        /* Control never gets here */        /* Control never gets here */
3834        }        }
# Line 3255  for (;;) Line 3842  for (;;)
3842        switch(ctype)        switch(ctype)
3843          {          {
3844          case OP_ANY:          case OP_ANY:
3845          if (!md->dotall)          if ((ims & PCRE_DOTALL) == 0)
3846            {            {
3847            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3848              {              {
# Line 3274  for (;;) Line 3861  for (;;)
3861          case OP_NOT_DIGIT:          case OP_NOT_DIGIT:
3862          for (i = min; i < max; i++)          for (i = min; i < max; i++)
3863            {            {
3864            if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_digit) != 0)            if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) != 0)
3865              break;              break;
3866            eptr++;            eptr++;
3867            }            }
# Line 3283  for (;;) Line 3870  for (;;)
3870          case OP_DIGIT:          case OP_DIGIT:
3871          for (i = min; i < max; i++)          for (i = min; i < max; i++)
3872            {            {
3873            if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_digit) == 0)            if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) == 0)
3874              break;              break;
3875            eptr++;            eptr++;
3876            }            }
# Line 3292  for (;;) Line 3879  for (;;)
3879          case OP_NOT_WHITESPACE:          case OP_NOT_WHITESPACE:
3880          for (i = min; i < max; i++)          for (i = min; i < max; i++)
3881            {            {
3882            if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_space) != 0)            if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) != 0)
3883              break;              break;
3884            eptr++;            eptr++;
3885            }            }
# Line 3301  for (;;) Line 3888  for (;;)
3888          case OP_WHITESPACE:          case OP_WHITESPACE:
3889          for (i = min; i < max; i++)          for (i = min; i < max; i++)
3890            {            {
3891            if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_space) == 0)            if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) == 0)
3892              break;              break;
3893            eptr++;            eptr++;
3894            }            }
# Line 3310  for (;;) Line 3897  for (;;)
3897          case OP_NOT_WORDCHAR:          case OP_NOT_WORDCHAR:
3898          for (i = min; i < max; i++)          for (i = min; i < max; i++)
3899            {            {
3900            if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_word) != 0)            if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) != 0)
3901              break;              break;
3902            eptr++;            eptr++;
3903            }            }
# Line 3319  for (;;) Line 3906  for (;;)
3906          case OP_WORDCHAR:          case OP_WORDCHAR:
3907          for (i = min; i < max; i++)          for (i = min; i < max; i++)
3908            {            {
3909            if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_word) == 0)            if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) == 0)
3910              break;              break;
3911            eptr++;            eptr++;
3912            }            }
# Line 3327  for (;;) Line 3914  for (;;)
3914          }          }
3915    
3916        while (eptr >= pp)        while (eptr >= pp)
3917          if (match(eptr--, ecode, offset_top, md)) return TRUE;          if (match(eptr--, ecode, offset_top, md, ims, FALSE, eptrb))
3918              return TRUE;
3919        return FALSE;        return FALSE;
3920        }        }
3921      /* Control never gets here */      /* Control never gets here */
# Line 3350  for (;;) Line 3938  for (;;)
3938    
3939    
3940    
 /*************************************************  
 *         Segregate setjmp()                     *  
 *************************************************/  
   
 /* The -Wall option of gcc gives warnings for all local variables when setjmp()  
 is used, even if the coding conforms to the rules of ANSI C. To avoid this, we  
 hide it in a separate function. This is called only when PCRE_EXTRA is set,  
 since it's needed only for the extension \X option, and with any luck, a good  
 compiler will spot the tail recursion and compile it efficiently.  
   
 Arguments:  
    eptr        pointer in subject  
    ecode       position in code  
    offset_top  current top pointer  
    md          pointer to "static" info for the match  
   
 Returns:       TRUE if matched  
 */  
   
 static BOOL  
 match_with_setjmp(const uschar *eptr, const uschar *ecode, int offset_top,  
   match_data *match_block)  
 {  
 return setjmp(match_block->fail_env) == 0 &&  
       match(eptr, ecode, offset_top, match_block);  
 }  
   
   
3941    
3942  /*************************************************  /*************************************************
3943  *         Execute a Regular Expression           *  *         Execute a Regular Expression           *
# Line 3408  pcre_exec(const pcre *external_re, const Line 3968  pcre_exec(const pcre *external_re, const
3968  {  {
3969  int resetcount, ocount;  int resetcount, ocount;
3970  int first_char = -1;  int first_char = -1;
3971    int ims = 0;
3972  match_data match_block;  match_data match_block;
3973  const uschar *start_bits = NULL;  const uschar *start_bits = NULL;
3974  const uschar *start_match = (const uschar *)subject;  const uschar *start_match = (const uschar *)subject;
# Line 3428  match_block.start_subject = (const uscha Line 3989  match_block.start_subject = (const uscha
3989  match_block.end_subject = match_block.start_subject + length;  match_block.end_subject = match_block.start_subject + length;
3990  end_subject = match_block.end_subject;  end_subject = match_block.end_subject;
3991    
3992  match_block.caseless  = ((re->options | options) & PCRE_CASELESS) != 0;  match_block.endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
 match_block.runtime_caseless = match_block.caseless &&  
   (re->options & PCRE_CASELESS) == 0;  
   
 match_block.multiline = ((re->options | options) & PCRE_MULTILINE) != 0;  
 match_block.dotall    = ((re->options | options) & PCRE_DOTALL) != 0;  
 match_block.endonly   = ((re->options | options) & PCRE_DOLLAR_ENDONLY) != 0;  
3993    
3994  match_block.notbol = (options & PCRE_NOTBOL) != 0;  match_block.notbol = (options & PCRE_NOTBOL) != 0;
3995  match_block.noteol = (options & PCRE_NOTEOL) != 0;  match_block.noteol = (options & PCRE_NOTEOL) != 0;
3996    
3997  match_block.errorcode = PCRE_ERROR_NOMATCH;     /* Default error */  match_block.errorcode = PCRE_ERROR_NOMATCH;     /* Default error */
3998    
3999    match_block.lcc = re->tables + lcc_offset;
4000    match_block.ctypes = re->tables + ctypes_offset;
4001    
4002    /* The ims options can vary during the matching as a result of the presence
4003    of (?ims) items in the pattern. They are kept in a local variable so that
4004    restoring at the exit of a group is easy. */
4005    
4006    ims = re->options & (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL);
4007    
4008  /* If the expression has got more back references than the offsets supplied can  /* If the expression has got more back references than the offsets supplied can
4009  hold, we get a temporary bit of working store to use during the matching.  hold, we get a temporary bit of working store to use during the matching.
4010  Otherwise, we can use the vector supplied, rounding down its size to a multiple  Otherwise, we can use the vector supplied, rounding down its size to a multiple
4011  of 2. */  of 3. */
4012    
4013  ocount = offsetcount & (-2);  ocount = offsetcount - (offsetcount % 3);
4014  if (re->top_backref > 0 && re->top_backref >= ocount/2)  
4015    if (re->top_backref > 0 && re->top_backref >= ocount/3)
4016    {    {
4017    ocount = re->top_backref * 2 + 2;    ocount = re->top_backref * 3 + 3;
4018    match_block.offset_vector = (pcre_malloc)(ocount * sizeof(int));    match_block.offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));
4019    if (match_block.offset_vector == NULL) return PCRE_ERROR_NOMEMORY;    if (match_block.offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
4020    using_temporary_offsets = TRUE;    using_temporary_offsets = TRUE;
4021    DPRINTF(("Got memory to hold back references\n"));    DPRINTF(("Got memory to hold back references\n"));
# Line 3458  if (re->top_backref > 0 && re->top_backr Line 4023  if (re->top_backref > 0 && re->top_backr
4023  else match_block.offset_vector = offsets;  else match_block.offset_vector = offsets;
4024    
4025  match_block.offset_end = ocount;  match_block.offset_end = ocount;
4026    match_block.offset_max = (2*ocount)/3;
4027  match_block.offset_overflow = FALSE;  match_block.offset_overflow = FALSE;
4028    
4029  /* Compute the minimum number of offsets that we need to reset each time. Doing  /* Compute the minimum number of offsets that we need to reset each time. Doing
# Line 3467  in the pattern. */ Line 4033  in the pattern. */
4033  resetcount = 2 + re->top_bracket * 2;  resetcount = 2 + re->top_bracket * 2;
4034  if (resetcount > offsetcount) resetcount = ocount;  if (resetcount > offsetcount) resetcount = ocount;
4035    
4036  /* If MULTILINE is set at exec time but was not set at compile time, and the  /* Reset the working variable associated with each extraction. These should
4037  anchored flag is set, we must re-check because a setting provoked by ^ in the  never be used unless previously set, but they get saved and restored, and so we
4038  pattern is not right in multi-line mode. Calling is_anchored() again here does  initialize them to avoid reading uninitialized locations. */
 the right check, because multiline is now set. If it now yields FALSE, the  
 expression must have had ^ starting some of its branches. Check to see if  
 that is true for *all* branches, and if so, set the startline flag. */  
4039    
4040  if (match_block. multiline && anchored && (re->options & PCRE_MULTILINE) == 0 &&  if (match_block.offset_vector != NULL)
     !is_anchored(re->code, match_block.multiline))  
4041    {    {
4042    anchored = FALSE;    register int *iptr = match_block.offset_vector + ocount;
4043    if (is_startline(re->code)) startline = TRUE;    register int *iend = iptr - resetcount/2 + 1;
4044      while (--iptr >= iend) *iptr = -1;
4045    }    }
4046    
4047  /* Set up the first character to match, if available. The first_char value is  /* Set up the first character to match, if available. The first_char value is
4048  never set for an anchored regular expression, but the anchoring may be forced  never set for an anchored regular expression, but the anchoring may be forced
4049  at run time, so we have to test for anchoring. The first char may be unset for  at run time, so we have to test for anchoring. The first char may be unset for
4050  an unanchored pattern, of course. If there's no first char and the pattern was  an unanchored pattern, of course. If there's no first char and the pattern was
4051  studied, the may be a bitmap of possible first characters. However, we can  studied, there may be a bitmap of possible first characters. */
 use this only if the caseless state of the studying was correct. */  
4052    
4053  if (!anchored)  if (!anchored)
4054    {    {
4055    if ((re->options & PCRE_FIRSTSET) != 0)    if ((re->options & PCRE_FIRSTSET) != 0)
4056      {      {
4057      first_char = re->first_char;      first_char = re->first_char;
4058      if (match_block.caseless) first_char = pcre_lcc[first_char];      if ((ims & PCRE_CASELESS) != 0) first_char = match_block.lcc[first_char];
4059      }      }
4060    else    else
4061      if (!startline && extra != NULL &&      if (!startline && extra != NULL &&
4062        (extra->options & PCRE_STUDY_MAPPED) != 0 &&        (extra->options & PCRE_STUDY_MAPPED) != 0)
       ((extra->options & PCRE_STUDY_CASELESS) != 0) == match_block.caseless)  
4063          start_bits = extra->start_bits;          start_bits = extra->start_bits;
4064    }    }
4065    
# Line 3518  do Line 4079  do
4079    
4080    if (first_char >= 0)    if (first_char >= 0)
4081      {      {
4082      if (match_block.caseless)      if ((ims & PCRE_CASELESS) != 0)
4083        while (start_match < end_subject && pcre_lcc[*start_match] != first_char)        while (start_match < end_subject &&
4084                 match_block.lcc[*start_match] != first_char)
4085          start_match++;          start_match++;
4086      else      else
4087        while (start_match < end_subject && *start_match != first_char)        while (start_match < end_subject && *start_match != first_char)
# Line 3559  do Line 4121  do
4121    there were too many extractions, set the return code to zero. In the case    there were too many extractions, set the return code to zero. In the case
4122    where we had to get some local store to hold offsets for backreferences, copy    where we had to get some local store to hold offsets for backreferences, copy
4123    those back references that we can. In this case there need not be overflow    those back references that we can. In this case there need not be overflow
4124    if certain parts of the pattern were not used.    if certain parts of the pattern were not used. */
   
   Before starting the match, we have to set up a longjmp() target to enable  
   the "cut" operation to fail a match completely without backtracking. This  
   is done in a separate function to avoid compiler warnings. We need not do  
   it unless PCRE_EXTRA is set, since only in that case is the "cut" operation  
   enabled. */  
4125    
4126    if ((re->options & PCRE_EXTRA) != 0)    if (!match(start_match, re->code, 2, &match_block, ims, FALSE, start_match))
4127      {      continue;
     if (!match_with_setjmp(start_match, re->code, 2, &match_block))  
       continue;  
     }  
   else if (!match(start_match, re->code, 2, &match_block)) continue;  
4128    
4129    /* Copy the offset information from temporary store if necessary */    /* Copy the offset information from temporary store if necessary */
4130    
# Line 3602  do Line 4154  do
4154    DPRINTF((">>>> returning %d\n", rc));    DPRINTF((">>>> returning %d\n", rc));
4155    return rc;    return rc;
4156    }    }
4157    
4158    /* This "while" is the end of the "do" above */
4159    
4160  while (!anchored &&  while (!anchored &&
4161         match_block.errorcode == PCRE_ERROR_NOMATCH &&         match_block.errorcode == PCRE_ERROR_NOMATCH &&
4162         start_match++ < end_subject);         start_match++ < end_subject);

Legend:
Removed from v.13  
changed lines
  Added in v.27

  ViewVC Help
Powered by ViewVC 1.1.5