/[pcre]/code/tags/pcre-3.2/pcre.c
ViewVC logotype

Diff of /code/tags/pcre-3.2/pcre.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 23 by nigel, Sat Feb 24 21:38:41 2007 UTC revision 47 by nigel, Sat Feb 24 21:39:29 2007 UTC
# Line 9  the file Tech.Notes for some information Line 9  the file Tech.Notes for some information
9    
10  Written by: Philip Hazel <ph10@cam.ac.uk>  Written by: Philip Hazel <ph10@cam.ac.uk>
11    
12             Copyright (c) 1998 University of Cambridge             Copyright (c) 1997-2000 University of Cambridge
13    
14  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
15  Permission is granted to anyone to use this software for any purpose on any  Permission is granted to anyone to use this software for any purpose on any
# Line 25  restrictions: Line 25  restrictions:
25    
26  3. Altered versions must be plainly marked as such, and must not be  3. Altered versions must be plainly marked as such, and must not be
27     misrepresented as being the original software.     misrepresented as being the original software.
28    
29    4. If PCRE is embedded in any software that is released under the GNU
30       General Purpose Licence (GPL), then the terms of that licence shall
31       supersede any condition above with which it is incompatible.
32  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
33  */  */
34    
# Line 78  static const char *OP_names[] = { Line 82  static const char *OP_names[] = {
82    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
83    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
84    "*", "*?", "+", "+?", "?", "??", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{",
85    "class", "Ref",    "class", "Ref", "Recurse",
86    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",
87    "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",    "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",
88    "Brazero", "Braminzero", "Bra"    "Brazero", "Braminzero", "Bra"
# Line 103  static const short int escapes[] = { Line 107  static const short int escapes[] = {
107      0,      0, -ESC_z                                            /* x - z */      0,      0, -ESC_z                                            /* x - z */
108  };  };
109    
110    /* Tables of names of POSIX character classes and their lengths. The list is
111    terminated by a zero length entry. The first three must be alpha, upper, lower,
112    as this is assumed for handling case independence. */
113    
114    static const char *posix_names[] = {
115      "alpha", "lower", "upper",
116      "alnum", "ascii", "cntrl", "digit", "graph",
117      "print", "punct", "space", "word",  "xdigit" };
118    
119    static const uschar posix_name_lengths[] = {
120      5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 6, 0 };
121    
122    /* Table of class bit maps for each POSIX class; up to three may be combined
123    to form the class. */
124    
125    static const int posix_class_maps[] = {
126      cbit_lower, cbit_upper, -1,             /* alpha */
127      cbit_lower, -1,         -1,             /* lower */
128      cbit_upper, -1,         -1,             /* upper */
129      cbit_digit, cbit_lower, cbit_upper,     /* alnum */
130      cbit_print, cbit_cntrl, -1,             /* ascii */
131      cbit_cntrl, -1,         -1,             /* cntrl */
132      cbit_digit, -1,         -1,             /* digit */
133      cbit_graph, -1,         -1,             /* graph */
134      cbit_print, -1,         -1,             /* print */
135      cbit_punct, -1,         -1,             /* punct */
136      cbit_space, -1,         -1,             /* space */
137      cbit_word,  -1,         -1,             /* word */
138      cbit_xdigit,-1,         -1              /* xdigit */
139    };
140    
141    
142  /* Definition to allow mutual recursion */  /* Definition to allow mutual recursion */
143    
144  static BOOL  static BOOL
145    compile_regex(int, int, int *, uschar **, const uschar **, const char **,    compile_regex(int, int, int *, uschar **, const uschar **, const char **,
146      BOOL, int);      BOOL, int, int *, int *, compile_data *);
147    
148    /* Structure for building a chain of data that actually lives on the
149    stack, for holding the values of the subject pointer at the start of each
150    subpattern, so as to detect when an empty string has been matched by a
151    subpattern - to break infinite loops. */
152    
153    typedef struct eptrblock {
154      struct eptrblock *prev;
155      const uschar *saved_eptr;
156    } eptrblock;
157    
158  /* Structure for passing "static" information around between the functions  /* Flag bits for the match() function */
 doing the matching, so that they are thread-safe. */  
159    
160  typedef struct match_data {  #define match_condassert   0x01    /* Called to check a condition assertion */
161    int    errorcode;             /* As it says */  #define match_isgroup      0x02    /* Set if start of bracketed group */
   int   *offset_vector;         /* Offset vector */  
   int    offset_end;            /* One past the end */  
   int    offset_max;            /* The maximum usable for return data */  
   BOOL   offset_overflow;       /* Set if too many extractions */  
   BOOL   notbol;                /* NOTBOL flag */  
   BOOL   noteol;                /* NOTEOL flag */  
   BOOL   endonly;               /* Dollar not before final \n */  
   const uschar *start_subject;  /* Start of the subject string */  
   const uschar *end_subject;    /* End of the subject string */  
   const uschar *end_match_ptr;  /* Subject position at end match */  
   int     end_offset_top;       /* Highwater mark at end of match */  
 } match_data;  
162    
163    
164    
# Line 145  void  (*pcre_free)(void *) = free; Line 178  void  (*pcre_free)(void *) = free;
178    
179    
180  /*************************************************  /*************************************************
181    *             Default character tables           *
182    *************************************************/
183    
184    /* A default set of character tables is included in the PCRE binary. Its source
185    is built by the maketables auxiliary program, which uses the default C ctypes
186    functions, and put in the file chartables.c. These tables are used by PCRE
187    whenever the caller of pcre_compile() does not provide an alternate set of
188    tables. */
189    
190    #include "chartables.c"
191    
192    
193    
194    /*************************************************
195  *          Return version string                 *  *          Return version string                 *
196  *************************************************/  *************************************************/
197    
198    #define STRING(a)  # a
199    #define XSTRING(s) STRING(s)
200    
201  const char *  const char *
202  pcre_version(void)  pcre_version(void)
203  {  {
204  return PCRE_VERSION;  return XSTRING(PCRE_MAJOR) "." XSTRING(PCRE_MINOR) " " XSTRING(PCRE_DATE);
205  }  }
206    
207    
208    
209    
210  /*************************************************  /*************************************************
211  *       Return info about a compiled pattern     *  * (Obsolete) Return info about compiled pattern  *
212  *************************************************/  *************************************************/
213    
214  /* This function picks potentially useful data out of the private  /* This is the original "info" function. It picks potentially useful data out
215  structure.  of the private structure, but its interface was too rigid. It remains for
216    backwards compatibility. The public options are passed back in an int - though
217    the re->options field has been expanded to a long int, all the public options
218    at the low end of it, and so even on 16-bit systems this will still be OK.
219    Therefore, I haven't changed the API for pcre_info().
220    
221  Arguments:  Arguments:
222    external_re   points to compiled code    external_re   points to compiled code
# Line 171  Arguments: Line 225  Arguments:
225                  or -1 if multiline and all branches start ^,                  or -1 if multiline and all branches start ^,
226                  or -2 otherwise                  or -2 otherwise
227    
228  Returns:        number of identifying extraction brackets  Returns:        number of capturing subpatterns
229                  or negative values on error                  or negative values on error
230  */  */
231    
# Line 181  pcre_info(const pcre *external_re, int * Line 235  pcre_info(const pcre *external_re, int *
235  const real_pcre *re = (const real_pcre *)external_re;  const real_pcre *re = (const real_pcre *)external_re;
236  if (re == NULL) return PCRE_ERROR_NULL;  if (re == NULL) return PCRE_ERROR_NULL;
237  if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;  if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;
238  if (optptr != NULL) *optptr = (re->options & PUBLIC_OPTIONS);  if (optptr != NULL) *optptr = (int)(re->options & PUBLIC_OPTIONS);
239  if (first_char != NULL)  if (first_char != NULL)
240    *first_char = ((re->options & PCRE_FIRSTSET) != 0)? re->first_char :    *first_char = ((re->options & PCRE_FIRSTSET) != 0)? re->first_char :
241       ((re->options & PCRE_STARTLINE) != 0)? -1 : -2;       ((re->options & PCRE_STARTLINE) != 0)? -1 : -2;
# Line 190  return re->top_bracket; Line 244  return re->top_bracket;
244    
245    
246    
247    /*************************************************
248    *        Return info about compiled pattern      *
249    *************************************************/
250    
251    /* This is a newer "info" function which has an extensible interface so
252    that additional items can be added compatibly.
253    
254    Arguments:
255      external_re      points to compiled code
256      external_study   points to study data, or NULL
257      what             what information is required
258      where            where to put the information
259    
260    Returns:           0 if data returned, negative on error
261    */
262    
263    int
264    pcre_fullinfo(const pcre *external_re, const pcre_extra *study_data, int what,
265      void *where)
266    {
267    const real_pcre *re = (const real_pcre *)external_re;
268    const real_pcre_extra *study = (const real_pcre_extra *)study_data;
269    
270    if (re == NULL || where == NULL) return PCRE_ERROR_NULL;
271    if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;
272    
273    switch (what)
274      {
275      case PCRE_INFO_OPTIONS:
276      *((unsigned long int *)where) = re->options & PUBLIC_OPTIONS;
277      break;
278    
279      case PCRE_INFO_SIZE:
280      *((size_t *)where) = re->size;
281      break;
282    
283      case PCRE_INFO_CAPTURECOUNT:
284      *((int *)where) = re->top_bracket;
285      break;
286    
287      case PCRE_INFO_BACKREFMAX:
288      *((int *)where) = re->top_backref;
289      break;
290    
291      case PCRE_INFO_FIRSTCHAR:
292      *((int *)where) =
293        ((re->options & PCRE_FIRSTSET) != 0)? re->first_char :
294        ((re->options & PCRE_STARTLINE) != 0)? -1 : -2;
295      break;
296    
297      case PCRE_INFO_FIRSTTABLE:
298      *((const uschar **)where) =
299        (study != NULL && (study->options & PCRE_STUDY_MAPPED) != 0)?
300          study->start_bits : NULL;
301      break;
302    
303      case PCRE_INFO_LASTLITERAL:
304      *((int *)where) =
305        ((re->options & PCRE_REQCHSET) != 0)? re->req_char : -1;
306      break;
307    
308      default: return PCRE_ERROR_BADOPTION;
309      }
310    
311    return 0;
312    }
313    
314    
315    
316  #ifdef DEBUG  #ifdef DEBUG
317  /*************************************************  /*************************************************
# Line 237  Arguments: Line 359  Arguments:
359    bracount   number of previous extracting brackets    bracount   number of previous extracting brackets
360    options    the options bits    options    the options bits
361    isclass    TRUE if inside a character class    isclass    TRUE if inside a character class
362      cd         pointer to char tables block
363    
364  Returns:     zero or positive => a data character  Returns:     zero or positive => a data character
365               negative => a special escape sequence               negative => a special escape sequence
# Line 245  Returns:     zero or positive => a data Line 368  Returns:     zero or positive => a data
368    
369  static int  static int
370  check_escape(const uschar **ptrptr, const char **errorptr, int bracount,  check_escape(const uschar **ptrptr, const char **errorptr, int bracount,
371    int options, BOOL isclass)    int options, BOOL isclass, compile_data *cd)
372  {  {
373  const uschar *ptr = *ptrptr;  const uschar *ptr = *ptrptr;
374  int c = *(++ptr) & 255;   /* Ensure > 0 on signed-char systems */  int c, i;
 int i;  
375    
376    c = *(++ptr) & 255;   /* Ensure > 0 on signed-char systems */
377  if (c == 0) *errorptr = ERR1;  if (c == 0) *errorptr = ERR1;
378    
379  /* Digits or letters may have special meaning; all others are literals. */  /* Digits or letters may have special meaning; all others are literals. */
# Line 288  else Line 411  else
411        {        {
412        oldptr = ptr;        oldptr = ptr;
413        c -= '0';        c -= '0';
414        while ((pcre_ctypes[ptr[1]] & ctype_digit) != 0)        while ((cd->ctypes[ptr[1]] & ctype_digit) != 0)
415          c = c * 10 + *(++ptr) - '0';          c = c * 10 + *(++ptr) - '0';
416        if (c < 10 || c <= bracount)        if (c < 10 || c <= bracount)
417          {          {
# Line 314  else Line 437  else
437    
438      case '0':      case '0':
439      c -= '0';      c -= '0';
440      while(i++ < 2 && (pcre_ctypes[ptr[1]] & ctype_digit) != 0 &&      while(i++ < 2 && (cd->ctypes[ptr[1]] & ctype_digit) != 0 &&
441        ptr[1] != '8' && ptr[1] != '9')        ptr[1] != '8' && ptr[1] != '9')
442          c = c * 8 + *(++ptr) - '0';          c = c * 8 + *(++ptr) - '0';
443      break;      break;
# Line 323  else Line 446  else
446    
447      case 'x':      case 'x':
448      c = 0;      c = 0;
449      while (i++ < 2 && (pcre_ctypes[ptr[1]] & ctype_xdigit) != 0)      while (i++ < 2 && (cd->ctypes[ptr[1]] & ctype_xdigit) != 0)
450        {        {
451        ptr++;        ptr++;
452        c = c * 16 + pcre_lcc[*ptr] -        c = c * 16 + cd->lcc[*ptr] -
453          (((pcre_ctypes[*ptr] & ctype_digit) != 0)? '0' : 'W');          (((cd->ctypes[*ptr] & ctype_digit) != 0)? '0' : 'W');
454        }        }
455      break;      break;
456    
# Line 341  else Line 464  else
464    
465      /* A letter is upper-cased; then the 0x40 bit is flipped */      /* A letter is upper-cased; then the 0x40 bit is flipped */
466    
467      if (c >= 'a' && c <= 'z') c = pcre_fcc[c];      if (c >= 'a' && c <= 'z') c = cd->fcc[c];
468      c ^= 0x40;      c ^= 0x40;
469      break;      break;
470    
471      /* PCRE_EXTRA enables extensions to Perl in the matter of escapes. Any      /* PCRE_EXTRA enables extensions to Perl in the matter of escapes. Any
472      other alphameric following \ is an error if PCRE_EXTRA was set; otherwise,      other alphameric following \ is an error if PCRE_EXTRA was set; otherwise,
473      for Perl compatibility, it is a literal. */      for Perl compatibility, it is a literal. This code looks a bit odd, but
474        there used to be some cases other than the default, and there may be again
475        in future, so I haven't "optimized" it. */
476    
477      default:      default:
478      if ((options & PCRE_EXTRA) != 0) switch(c)      if ((options & PCRE_EXTRA) != 0) switch(c)
# Line 377  where the ddds are digits. Line 502  where the ddds are digits.
502    
503  Arguments:  Arguments:
504    p         pointer to the first char after '{'    p         pointer to the first char after '{'
505      cd        pointer to char tables block
506    
507  Returns:    TRUE or FALSE  Returns:    TRUE or FALSE
508  */  */
509    
510  static BOOL  static BOOL
511  is_counted_repeat(const uschar *p)  is_counted_repeat(const uschar *p, compile_data *cd)
512  {  {
513  if ((pcre_ctypes[*p++] & ctype_digit) == 0) return FALSE;  if ((cd->ctypes[*p++] & ctype_digit) == 0) return FALSE;
514  while ((pcre_ctypes[*p] & ctype_digit) != 0) p++;  while ((cd->ctypes[*p] & ctype_digit) != 0) p++;
515  if (*p == '}') return TRUE;  if (*p == '}') return TRUE;
516    
517  if (*p++ != ',') return FALSE;  if (*p++ != ',') return FALSE;
518  if (*p == '}') return TRUE;  if (*p == '}') return TRUE;
519    
520  if ((pcre_ctypes[*p++] & ctype_digit) == 0) return FALSE;  if ((cd->ctypes[*p++] & ctype_digit) == 0) return FALSE;
521  while ((pcre_ctypes[*p] & ctype_digit) != 0) p++;  while ((cd->ctypes[*p] & ctype_digit) != 0) p++;
522  return (*p == '}');  return (*p == '}');
523  }  }
524    
# Line 412  Arguments: Line 538  Arguments:
538    maxp       pointer to int for max    maxp       pointer to int for max
539               returned as -1 if no max               returned as -1 if no max
540    errorptr   points to pointer to error message    errorptr   points to pointer to error message
541      cd         pointer to character tables clock
542    
543  Returns:     pointer to '}' on success;  Returns:     pointer to '}' on success;
544               current ptr on error, with errorptr set               current ptr on error, with errorptr set
545  */  */
546    
547  static const uschar *  static const uschar *
548  read_repeat_counts(const uschar *p, int *minp, int *maxp, const char **errorptr)  read_repeat_counts(const uschar *p, int *minp, int *maxp,
549      const char **errorptr, compile_data *cd)
550  {  {
551  int min = 0;  int min = 0;
552  int max = -1;  int max = -1;
553    
554  while ((pcre_ctypes[*p] & ctype_digit) != 0) min = min * 10 + *p++ - '0';  while ((cd->ctypes[*p] & ctype_digit) != 0) min = min * 10 + *p++ - '0';
555    
556  if (*p == '}') max = min; else  if (*p == '}') max = min; else
557    {    {
558    if (*(++p) != '}')    if (*(++p) != '}')
559      {      {
560      max = 0;      max = 0;
561      while((pcre_ctypes[*p] & ctype_digit) != 0) max = max * 10 + *p++ - '0';      while((cd->ctypes[*p] & ctype_digit) != 0) max = max * 10 + *p++ - '0';
562      if (max < min)      if (max < min)
563        {        {
564        *errorptr = ERR4;        *errorptr = ERR4;
# Line 526  for (;;) Line 654  for (;;)
654    
655      case OP_REVERSE:      case OP_REVERSE:
656      cc++;      cc++;
657        /* Fall through */
658    
659      case OP_CREF:      case OP_CREF:
660      case OP_OPT:      case OP_OPT:
# Line 609  for (;;) Line 738  for (;;)
738    
739    
740  /*************************************************  /*************************************************
741    *           Check for POSIX class syntax         *
742    *************************************************/
743    
744    /* This function is called when the sequence "[:" or "[." or "[=" is
745    encountered in a character class. It checks whether this is followed by an
746    optional ^ and then a sequence of letters, terminated by a matching ":]" or
747    ".]" or "=]".
748    
749    Argument:
750      ptr      pointer to the initial [
751      endptr   where to return the end pointer
752      cd       pointer to compile data
753    
754    Returns:   TRUE or FALSE
755    */
756    
757    static BOOL
758    check_posix_syntax(const uschar *ptr, const uschar **endptr, compile_data *cd)
759    {
760    int terminator;          /* Don't combine these lines; the Solaris cc */
761    terminator = *(++ptr);   /* compiler warns about "non-constant" initializer. */
762    if (*(++ptr) == '^') ptr++;
763    while ((cd->ctypes[*ptr] & ctype_letter) != 0) ptr++;
764    if (*ptr == terminator && ptr[1] == ']')
765      {
766      *endptr = ptr;
767      return TRUE;
768      }
769    return FALSE;
770    }
771    
772    
773    
774    
775    /*************************************************
776    *          Check POSIX class name                *
777    *************************************************/
778    
779    /* This function is called to check the name given in a POSIX-style class entry
780    such as [:alnum:].
781    
782    Arguments:
783      ptr        points to the first letter
784      len        the length of the name
785    
786    Returns:     a value representing the name, or -1 if unknown
787    */
788    
789    static int
790    check_posix_name(const uschar *ptr, int len)
791    {
792    register int yield = 0;
793    while (posix_name_lengths[yield] != 0)
794      {
795      if (len == posix_name_lengths[yield] &&
796        strncmp((const char *)ptr, posix_names[yield], len) == 0) return yield;
797      yield++;
798      }
799    return -1;
800    }
801    
802    
803    
804    
805    /*************************************************
806  *           Compile one branch                   *  *           Compile one branch                   *
807  *************************************************/  *************************************************/
808    
809  /* Scan the pattern, compiling it into the code vector.  /* Scan the pattern, compiling it into the code vector.
810    
811  Arguments:  Arguments:
812    options     the option bits    options      the option bits
813    brackets    points to number of brackets used    brackets     points to number of brackets used
814    code        points to the pointer to the current code point    code         points to the pointer to the current code point
815    ptrptr      points to the current pattern pointer    ptrptr       points to the current pattern pointer
816    errorptr    points to pointer to error message    errorptr     points to pointer to error message
817    optchanged  set to the value of the last OP_OPT item compiled    optchanged   set to the value of the last OP_OPT item compiled
818      reqchar      set to the last literal character required, else -1
819      countlits    set to count of mandatory literal characters
820      cd           contains pointers to tables
821    
822  Returns:      TRUE on success  Returns:       TRUE on success
823                FALSE, with *errorptr set on error                 FALSE, with *errorptr set on error
824  */  */
825    
826  static BOOL  static BOOL
827  compile_branch(int options, int *brackets, uschar **codeptr,  compile_branch(int options, int *brackets, uschar **codeptr,
828    const uschar **ptrptr, const char **errorptr, int *optchanged)    const uschar **ptrptr, const char **errorptr, int *optchanged,
829      int *reqchar, int *countlits, compile_data *cd)
830  {  {
831  int repeat_type, op_type;  int repeat_type, op_type;
832  int repeat_min, repeat_max;  int repeat_min, repeat_max;
833  int bravalue, length;  int bravalue, length;
834  int greedy_default, greedy_non_default;  int greedy_default, greedy_non_default;
835    int prevreqchar;
836    int condcount = 0;
837    int subcountlits = 0;
838  register int c;  register int c;
839  register uschar *code = *codeptr;  register uschar *code = *codeptr;
840  uschar *tempcode;  uschar *tempcode;
# Line 647  uschar class[32]; Line 848  uschar class[32];
848  greedy_default = ((options & PCRE_UNGREEDY) != 0);  greedy_default = ((options & PCRE_UNGREEDY) != 0);
849  greedy_non_default = greedy_default ^ 1;  greedy_non_default = greedy_default ^ 1;
850    
851    /* Initialize no required char, and count of literals */
852    
853    *reqchar = prevreqchar = -1;
854    *countlits = 0;
855    
856  /* Switch on next character until the end of the branch */  /* Switch on next character until the end of the branch */
857    
858  for (;; ptr++)  for (;; ptr++)
# Line 656  for (;; ptr++) Line 862  for (;; ptr++)
862    int class_lastchar;    int class_lastchar;
863    int newoptions;    int newoptions;
864    int condref;    int condref;
865      int subreqchar;
866    
867    c = *ptr;    c = *ptr;
868    if ((options & PCRE_EXTENDED) != 0)    if ((options & PCRE_EXTENDED) != 0)
869      {      {
870      if ((pcre_ctypes[c] & ctype_space) != 0) continue;      if ((cd->ctypes[c] & ctype_space) != 0) continue;
871      if (c == '#')      if (c == '#')
872        {        {
873        while ((c = *(++ptr)) != 0 && c != '\n');        /* The space before the ; is to avoid a warning on a silly compiler
874          on the Macintosh. */
875          while ((c = *(++ptr)) != 0 && c != '\n') ;
876        continue;        continue;
877        }        }
878      }      }
# Line 738  for (;; ptr++) Line 947  for (;; ptr++)
947          goto FAILED;          goto FAILED;
948          }          }
949    
950          /* Handle POSIX class names. Perl allows a negation extension of the
951          form [:^name]. A square bracket that doesn't match the syntax is
952          treated as a literal. We also recognize the POSIX constructions
953          [.ch.] and [=ch=] ("collating elements") and fault them, as Perl
954          5.6 does. */
955    
956          if (c == '[' &&
957              (ptr[1] == ':' || ptr[1] == '.' || ptr[1] == '=') &&
958              check_posix_syntax(ptr, &tempptr, cd))
959            {
960            BOOL local_negate = FALSE;
961            int posix_class, i;
962            register const uschar *cbits = cd->cbits;
963    
964            if (ptr[1] != ':')
965              {
966              *errorptr = ERR31;
967              goto FAILED;
968              }
969    
970            ptr += 2;
971            if (*ptr == '^')
972              {
973              local_negate = TRUE;
974              ptr++;
975              }
976    
977            posix_class = check_posix_name(ptr, tempptr - ptr);
978            if (posix_class < 0)
979              {
980              *errorptr = ERR30;
981              goto FAILED;
982              }
983    
984            /* If matching is caseless, upper and lower are converted to
985            alpha. This relies on the fact that the class table starts with
986            alpha, lower, upper as the first 3 entries. */
987    
988            if ((options & PCRE_CASELESS) != 0 && posix_class <= 2)
989              posix_class = 0;
990    
991            /* Or into the map we are building up to 3 of the static class
992            tables, or their negations. */
993    
994            posix_class *= 3;
995            for (i = 0; i < 3; i++)
996              {
997              int taboffset = posix_class_maps[posix_class + i];
998              if (taboffset < 0) break;
999              if (local_negate)
1000                for (c = 0; c < 32; c++) class[c] |= ~cbits[c+taboffset];
1001              else
1002                for (c = 0; c < 32; c++) class[c] |= cbits[c+taboffset];
1003              }
1004    
1005            ptr = tempptr + 1;
1006            class_charcount = 10;  /* Set > 1; assumes more than 1 per class */
1007            continue;
1008            }
1009    
1010        /* Backslash may introduce a single character, or it may introduce one        /* Backslash may introduce a single character, or it may introduce one
1011        of the specials, which just set a flag. Escaped items are checked for        of the specials, which just set a flag. Escaped items are checked for
1012        validity in the pre-compiling pass. The sequence \b is a special case.        validity in the pre-compiling pass. The sequence \b is a special case.
# Line 748  for (;; ptr++) Line 1017  for (;; ptr++)
1017    
1018        if (c == '\\')        if (c == '\\')
1019          {          {
1020          c = check_escape(&ptr, errorptr, *brackets, options, TRUE);          c = check_escape(&ptr, errorptr, *brackets, options, TRUE, cd);
1021          if (-c == ESC_b) c = '\b';          if (-c == ESC_b) c = '\b';
1022          else if (c < 0)          else if (c < 0)
1023            {            {
1024              register const uschar *cbits = cd->cbits;
1025            class_charcount = 10;            class_charcount = 10;
1026            switch (-c)            switch (-c)
1027              {              {
1028              case ESC_d:              case ESC_d:
1029              for (c = 0; c < 32; c++) class[c] |= pcre_cbits[c+cbit_digit];              for (c = 0; c < 32; c++) class[c] |= cbits[c+cbit_digit];
1030              continue;              continue;
1031    
1032              case ESC_D:              case ESC_D:
1033              for (c = 0; c < 32; c++) class[c] |= ~pcre_cbits[c+cbit_digit];              for (c = 0; c < 32; c++) class[c] |= ~cbits[c+cbit_digit];
1034              continue;              continue;
1035    
1036              case ESC_w:              case ESC_w:
1037              for (c = 0; c < 32; c++)              for (c = 0; c < 32; c++) class[c] |= cbits[c+cbit_word];
               class[c] |= (pcre_cbits[c] | pcre_cbits[c+cbit_word]);  
1038              continue;              continue;
1039    
1040              case ESC_W:              case ESC_W:
1041              for (c = 0; c < 32; c++)              for (c = 0; c < 32; c++) class[c] |= ~cbits[c+cbit_word];
               class[c] |= ~(pcre_cbits[c] | pcre_cbits[c+cbit_word]);  
1042              continue;              continue;
1043    
1044              case ESC_s:              case ESC_s:
1045              for (c = 0; c < 32; c++) class[c] |= pcre_cbits[c+cbit_space];              for (c = 0; c < 32; c++) class[c] |= cbits[c+cbit_space];
1046              continue;              continue;
1047    
1048              case ESC_S:              case ESC_S:
1049              for (c = 0; c < 32; c++) class[c] |= ~pcre_cbits[c+cbit_space];              for (c = 0; c < 32; c++) class[c] |= ~cbits[c+cbit_space];
1050              continue;              continue;
1051    
1052              default:              default:
# Line 810  for (;; ptr++) Line 1078  for (;; ptr++)
1078    
1079          if (d == '\\')          if (d == '\\')
1080            {            {
1081            d = check_escape(&ptr, errorptr, *brackets, options, TRUE);            d = check_escape(&ptr, errorptr, *brackets, options, TRUE, cd);
1082            if (d < 0)            if (d < 0)
1083              {              {
1084              if (d == -ESC_b) d = '\b'; else              if (d == -ESC_b) d = '\b'; else
# Line 832  for (;; ptr++) Line 1100  for (;; ptr++)
1100            class[c/8] |= (1 << (c&7));            class[c/8] |= (1 << (c&7));
1101            if ((options & PCRE_CASELESS) != 0)            if ((options & PCRE_CASELESS) != 0)
1102              {              {
1103              int uc = pcre_fcc[c];           /* flip case */              int uc = cd->fcc[c];           /* flip case */
1104              class[uc/8] |= (1 << (uc&7));              class[uc/8] |= (1 << (uc&7));
1105              }              }
1106            class_charcount++;                /* in case a one-char range */            class_charcount++;                /* in case a one-char range */
# Line 847  for (;; ptr++) Line 1115  for (;; ptr++)
1115        class [c/8] |= (1 << (c&7));        class [c/8] |= (1 << (c&7));
1116        if ((options & PCRE_CASELESS) != 0)        if ((options & PCRE_CASELESS) != 0)
1117          {          {
1118          c = pcre_fcc[c];   /* flip case */          c = cd->fcc[c];   /* flip case */
1119          class[c/8] |= (1 << (c&7));          class[c/8] |= (1 << (c&7));
1120          }          }
1121        class_charcount++;        class_charcount++;
# Line 894  for (;; ptr++) Line 1162  for (;; ptr++)
1162      /* Various kinds of repeat */      /* Various kinds of repeat */
1163    
1164      case '{':      case '{':
1165      if (!is_counted_repeat(ptr+1)) goto NORMAL_CHAR;      if (!is_counted_repeat(ptr+1, cd)) goto NORMAL_CHAR;
1166      ptr = read_repeat_counts(ptr+1, &repeat_min, &repeat_max, errorptr);      ptr = read_repeat_counts(ptr+1, &repeat_min, &repeat_max, errorptr, cd);
1167      if (*errorptr != NULL) goto FAILED;      if (*errorptr != NULL) goto FAILED;
1168      goto REPEAT;      goto REPEAT;
1169    
# Line 928  for (;; ptr++) Line 1196  for (;; ptr++)
1196        { repeat_type = greedy_non_default; ptr++; }        { repeat_type = greedy_non_default; ptr++; }
1197      else repeat_type = greedy_default;      else repeat_type = greedy_default;
1198    
     /* If the maximum is zero then the minimum must also be zero; Perl allows  
     this case, so we do too - by simply omitting the item altogether. */  
   
     if (repeat_max == 0) code = previous;  
   
1199      /* If previous was a string of characters, chop off the last one and use it      /* If previous was a string of characters, chop off the last one and use it
1200      as the subject of the repeat. If there was only one character, we can      as the subject of the repeat. If there was only one character, we can
1201      abolish the previous item altogether. */      abolish the previous item altogether. A repeat with a zero minimum wipes
1202        out any reqchar setting, backing up to the previous value. We must also
1203        adjust the countlits value. */
1204    
1205      else if (*previous == OP_CHARS)      if (*previous == OP_CHARS)
1206        {        {
1207        int len = previous[1];        int len = previous[1];
1208    
1209          if (repeat_min == 0) *reqchar = prevreqchar;
1210          *countlits += repeat_min - 1;
1211    
1212        if (len == 1)        if (len == 1)
1213          {          {
1214          c = previous[2];          c = previous[2];
# Line 978  for (;; ptr++) Line 1247  for (;; ptr++)
1247        code = previous;        code = previous;
1248    
1249        OUTPUT_SINGLE_REPEAT:        OUTPUT_SINGLE_REPEAT:
1250        repeat_type += op_type;      /* Combine both values for many cases */  
1251          /* If the maximum is zero then the minimum must also be zero; Perl allows
1252          this case, so we do too - by simply omitting the item altogether. */
1253    
1254          if (repeat_max == 0) goto END_REPEAT;
1255    
1256          /* Combine the op_type with the repeat_type */
1257    
1258          repeat_type += op_type;
1259    
1260        /* A minimum of zero is handled either as the special case * or ?, or as        /* A minimum of zero is handled either as the special case * or ?, or as
1261        an UPTO, with the maximum given. */        an UPTO, with the maximum given. */
# Line 1055  for (;; ptr++) Line 1332  for (;; ptr++)
1332        }        }
1333    
1334      /* If previous was a character class or a back reference, we put the repeat      /* If previous was a character class or a back reference, we put the repeat
1335      stuff after it. */      stuff after it, but just skip the item if the repeat was {0,0}. */
1336    
1337      else if (*previous == OP_CLASS || *previous == OP_REF)      else if (*previous == OP_CLASS || *previous == OP_REF)
1338        {        {
1339          if (repeat_max == 0)
1340            {
1341            code = previous;
1342            goto END_REPEAT;
1343            }
1344        if (repeat_min == 0 && repeat_max == -1)        if (repeat_min == 0 && repeat_max == -1)
1345          *code++ = OP_CRSTAR + repeat_type;          *code++ = OP_CRSTAR + repeat_type;
1346        else if (repeat_min == 1 && repeat_max == -1)        else if (repeat_min == 1 && repeat_max == -1)
# Line 1082  for (;; ptr++) Line 1364  for (;; ptr++)
1364      else if ((int)*previous >= OP_BRA || (int)*previous == OP_ONCE ||      else if ((int)*previous >= OP_BRA || (int)*previous == OP_ONCE ||
1365               (int)*previous == OP_COND)               (int)*previous == OP_COND)
1366        {        {
1367        int i, ketoffset = 0;        register int i;
1368          int ketoffset = 0;
1369        int len = code - previous;        int len = code - previous;
1370          uschar *bralink = NULL;
1371    
1372        /* If the maximum repeat count is unlimited, find the end of the bracket        /* If the maximum repeat count is unlimited, find the end of the bracket
1373        by scanning through from the start, and compute the offset back to it        by scanning through from the start, and compute the offset back to it
# Line 1098  for (;; ptr++) Line 1382  for (;; ptr++)
1382          ketoffset = code - ket;          ketoffset = code - ket;
1383          }          }
1384    
1385        /* If the minimum is greater than zero, and the maximum is unlimited or        /* The case of a zero minimum is special because of the need to stick
1386        equal to the minimum, the first copy remains where it is, and is        OP_BRAZERO in front of it, and because the group appears once in the
1387        replicated up to the minimum number of times. This case includes the +        data, whereas in other cases it appears the minimum number of times. For
1388        repeat, but of course no replication is needed in that case. */        this reason, it is simplest to treat this case separately, as otherwise
1389          the code gets far too mess. There are several special subcases when the
1390          minimum is zero. */
1391    
1392        if (repeat_min > 0 && (repeat_max == -1 || repeat_max == repeat_min))        if (repeat_min == 0)
1393          {          {
1394          for (i = 1; i < repeat_min; i++)          /* If we set up a required char from the bracket, we must back off
1395            to the previous value and reset the countlits value too. */
1396    
1397            if (subcountlits > 0)
1398            {            {
1399            memcpy(code, previous, len);            *reqchar = prevreqchar;
1400            code += len;            *countlits -= subcountlits;
1401            }            }
         }  
1402    
1403        /* If the minimum is zero, stick BRAZERO in front of the first copy.          /* If the maximum is also zero, we just omit the group from the output
1404        Then, if there is a fixed upper limit, replicated up to that many times,          altogether. */
       sticking BRAZERO in front of all the optional ones. */  
1405    
1406        else          if (repeat_max == 0)
1407          {            {
1408          if (repeat_min == 0)            code = previous;
1409              goto END_REPEAT;
1410              }
1411    
1412            /* If the maximum is 1 or unlimited, we just have to stick in the
1413            BRAZERO and do no more at this point. */
1414    
1415            if (repeat_max <= 1)
1416            {            {
1417            memmove(previous+1, previous, len);            memmove(previous+1, previous, len);
1418            code++;            code++;
1419            *previous++ = OP_BRAZERO + repeat_type;            *previous++ = OP_BRAZERO + repeat_type;
1420            }            }
1421    
1422            /* If the maximum is greater than 1 and limited, we have to replicate
1423            in a nested fashion, sticking OP_BRAZERO before each set of brackets.
1424            The first one has to be handled carefully because it's the original
1425            copy, which has to be moved up. The remainder can be handled by code
1426            that is common with the non-zero minimum case below. We just have to
1427            adjust the value or repeat_max, since one less copy is required. */
1428    
1429            else
1430              {
1431              int offset;
1432              memmove(previous+4, previous, len);
1433              code += 4;
1434              *previous++ = OP_BRAZERO + repeat_type;
1435              *previous++ = OP_BRA;
1436    
1437              /* We chain together the bracket offset fields that have to be
1438              filled in later when the ends of the brackets are reached. */
1439    
1440              offset = (bralink == NULL)? 0 : previous - bralink;
1441              bralink = previous;
1442              *previous++ = offset >> 8;
1443              *previous++ = offset & 255;
1444              }
1445    
1446            repeat_max--;
1447            }
1448    
1449          /* If the minimum is greater than zero, replicate the group as many
1450          times as necessary, and adjust the maximum to the number of subsequent
1451          copies that we need. */
1452    
1453          else
1454            {
1455          for (i = 1; i < repeat_min; i++)          for (i = 1; i < repeat_min; i++)
1456            {            {
1457            memcpy(code, previous, len);            memcpy(code, previous, len);
1458            code += len;            code += len;
1459            }            }
1460            if (repeat_max > 0) repeat_max -= repeat_min;
1461            }
1462    
1463          for (i = (repeat_min > 0)? repeat_min : 1; i < repeat_max; i++)        /* This code is common to both the zero and non-zero minimum cases. If
1464          the maximum is limited, it replicates the group in a nested fashion,
1465          remembering the bracket starts on a stack. In the case of a zero minimum,
1466          the first one was set up above. In all cases the repeat_max now specifies
1467          the number of additional copies needed. */
1468    
1469          if (repeat_max >= 0)
1470            {
1471            for (i = repeat_max - 1; i >= 0; i--)
1472            {            {
1473            *code++ = OP_BRAZERO + repeat_type;            *code++ = OP_BRAZERO + repeat_type;
1474    
1475              /* All but the final copy start a new nesting, maintaining the
1476              chain of brackets outstanding. */
1477    
1478              if (i != 0)
1479                {
1480                int offset;
1481                *code++ = OP_BRA;
1482                offset = (bralink == NULL)? 0 : code - bralink;
1483                bralink = code;
1484                *code++ = offset >> 8;
1485                *code++ = offset & 255;
1486                }
1487    
1488            memcpy(code, previous, len);            memcpy(code, previous, len);
1489            code += len;            code += len;
1490            }            }
1491    
1492            /* Now chain through the pending brackets, and fill in their length
1493            fields (which are holding the chain links pro tem). */
1494    
1495            while (bralink != NULL)
1496              {
1497              int oldlinkoffset;
1498              int offset = code - bralink + 1;
1499              uschar *bra = code - offset;
1500              oldlinkoffset = (bra[1] << 8) + bra[2];
1501              bralink = (oldlinkoffset == 0)? NULL : bralink - oldlinkoffset;
1502              *code++ = OP_KET;
1503              *code++ = bra[1] = offset >> 8;
1504              *code++ = bra[2] = (offset & 255);
1505              }
1506          }          }
1507    
1508        /* If the maximum is unlimited, set a repeater in the final copy. We        /* If the maximum is unlimited, set a repeater in the final copy. We
# Line 1144  for (;; ptr++) Line 1510  for (;; ptr++)
1510        don't know if there's been an options resetting after the ket. The        don't know if there's been an options resetting after the ket. The
1511        correct offset was computed above. */        correct offset was computed above. */
1512    
1513        if (repeat_max == -1) code[-ketoffset] = OP_KETRMAX + repeat_type;        else code[-ketoffset] = OP_KETRMAX + repeat_type;
1514        }        }
1515    
1516      /* Else there's some kind of shambles */      /* Else there's some kind of shambles */
# Line 1157  for (;; ptr++) Line 1523  for (;; ptr++)
1523    
1524      /* In all case we no longer have a previous item. */      /* In all case we no longer have a previous item. */
1525    
1526        END_REPEAT:
1527      previous = NULL;      previous = NULL;
1528      break;      break;
1529    
# Line 1191  for (;; ptr++) Line 1558  for (;; ptr++)
1558    
1559          case '(':          case '(':
1560          bravalue = OP_COND;       /* Conditional group */          bravalue = OP_COND;       /* Conditional group */
1561          if ((pcre_ctypes[*(++ptr)] & ctype_digit) != 0)          if ((cd->ctypes[*(++ptr)] & ctype_digit) != 0)
1562            {            {
1563            condref = *ptr - '0';            condref = *ptr - '0';
1564            while (*(++ptr) != ')') condref = condref*10 + *ptr - '0';            while (*(++ptr) != ')') condref = condref*10 + *ptr - '0';
# Line 1234  for (;; ptr++) Line 1601  for (;; ptr++)
1601          ptr++;          ptr++;
1602          break;          break;
1603    
1604            case 'R':                 /* Pattern recursion */
1605            *code++ = OP_RECURSE;
1606            ptr++;
1607            continue;
1608    
1609          default:                  /* Option setting */          default:                  /* Option setting */
1610          set = unset = 0;          set = unset = 0;
1611          optset = &set;          optset = &set;
# Line 1324  for (;; ptr++) Line 1696  for (;; ptr++)
1696           errorptr,                     /* Where to put an error message */           errorptr,                     /* Where to put an error message */
1697           (bravalue == OP_ASSERTBACK ||           (bravalue == OP_ASSERTBACK ||
1698            bravalue == OP_ASSERTBACK_NOT), /* TRUE if back assert */            bravalue == OP_ASSERTBACK_NOT), /* TRUE if back assert */
1699           condref))                     /* Condition reference number */           condref,                      /* Condition reference number */
1700             &subreqchar,                  /* For possible last char */
1701             &subcountlits,                /* For literal count */
1702             cd))                          /* Tables block */
1703        goto FAILED;        goto FAILED;
1704    
1705      /* At the end of compiling, code is still pointing to the start of the      /* At the end of compiling, code is still pointing to the start of the
# Line 1337  for (;; ptr++) Line 1712  for (;; ptr++)
1712    
1713      if (bravalue == OP_COND)      if (bravalue == OP_COND)
1714        {        {
       int branchcount = 0;  
1715        uschar *tc = code;        uschar *tc = code;
1716          condcount = 0;
1717    
1718        do {        do {
1719           branchcount++;           condcount++;
1720           tc += (tc[1] << 8) | tc[2];           tc += (tc[1] << 8) | tc[2];
1721           }           }
1722        while (*tc != OP_KET);        while (*tc != OP_KET);
1723    
1724        if (branchcount > 2)        if (condcount > 2)
1725          {          {
1726          *errorptr = ERR27;          *errorptr = ERR27;
1727          goto FAILED;          goto FAILED;
1728          }          }
1729        }        }
1730    
1731        /* Handle updating of the required character. If the subpattern didn't
1732        set one, leave it as it was. Otherwise, update it for normal brackets of
1733        all kinds, forward assertions, and conditions with two branches. Don't
1734        update the literal count for forward assertions, however. If the bracket
1735        is followed by a quantifier with zero repeat, we have to back off. Hence
1736        the definition of prevreqchar and subcountlits outside the main loop so
1737        that they can be accessed for the back off. */
1738    
1739        if (subreqchar > 0 &&
1740             (bravalue >= OP_BRA || bravalue == OP_ONCE || bravalue == OP_ASSERT ||
1741             (bravalue == OP_COND && condcount == 2)))
1742          {
1743          prevreqchar = *reqchar;
1744          *reqchar = subreqchar;
1745          if (bravalue != OP_ASSERT) *countlits += subcountlits;
1746          }
1747    
1748      /* Now update the main code pointer to the end of the group. */      /* Now update the main code pointer to the end of the group. */
1749    
1750      code = tempcode;      code = tempcode;
# Line 1372  for (;; ptr++) Line 1764  for (;; ptr++)
1764    
1765      case '\\':      case '\\':
1766      tempptr = ptr;      tempptr = ptr;
1767      c = check_escape(&ptr, errorptr, *brackets, options, FALSE);      c = check_escape(&ptr, errorptr, *brackets, options, FALSE, cd);
1768    
1769      /* Handle metacharacters introduced by \. For ones like \d, the ESC_ values      /* Handle metacharacters introduced by \. For ones like \d, the ESC_ values
1770      are arranged to be the negation of the corresponding OP_values. For the      are arranged to be the negation of the corresponding OP_values. For the
# Line 1417  for (;; ptr++) Line 1809  for (;; ptr++)
1809        {        {
1810        if ((options & PCRE_EXTENDED) != 0)        if ((options & PCRE_EXTENDED) != 0)
1811          {          {
1812          if ((pcre_ctypes[c] & ctype_space) != 0) continue;          if ((cd->ctypes[c] & ctype_space) != 0) continue;
1813          if (c == '#')          if (c == '#')
1814            {            {
1815            while ((c = *(++ptr)) != 0 && c != '\n');            /* The space before the ; is to avoid a warning on a silly compiler
1816              on the Macintosh. */
1817              while ((c = *(++ptr)) != 0 && c != '\n') ;
1818            if (c == 0) break;            if (c == 0) break;
1819            continue;            continue;
1820            }            }
# Line 1433  for (;; ptr++) Line 1827  for (;; ptr++)
1827        if (c == '\\')        if (c == '\\')
1828          {          {
1829          tempptr = ptr;          tempptr = ptr;
1830          c = check_escape(&ptr, errorptr, *brackets, options, FALSE);          c = check_escape(&ptr, errorptr, *brackets, options, FALSE, cd);
1831          if (c < 0) { ptr = tempptr; break; }          if (c < 0) { ptr = tempptr; break; }
1832          }          }
1833    
# Line 1445  for (;; ptr++) Line 1839  for (;; ptr++)
1839    
1840      /* This "while" is the end of the "do" above. */      /* This "while" is the end of the "do" above. */
1841    
1842      while (length < 255 && (pcre_ctypes[c = *(++ptr)] & ctype_meta) == 0);      while (length < 255 && (cd->ctypes[c = *(++ptr)] & ctype_meta) == 0);
1843    
1844        /* Update the last character and the count of literals */
1845    
1846        prevreqchar = (length > 1)? code[-2] : *reqchar;
1847        *reqchar = code[-1];
1848        *countlits += length;
1849    
1850      /* Compute the length and set it in the data vector, and advance to      /* Compute the length and set it in the data vector, and advance to
1851      the next state. */      the next state. */
# Line 1490  Argument: Line 1890  Argument:
1890    errorptr    -> pointer to error message    errorptr    -> pointer to error message
1891    lookbehind  TRUE if this is a lookbehind assertion    lookbehind  TRUE if this is a lookbehind assertion
1892    condref     > 0 for OPT_CREF setting at start of conditional group    condref     > 0 for OPT_CREF setting at start of conditional group
1893      reqchar     -> place to put the last required character, or a negative number
1894      countlits   -> place to put the shortest literal count of any branch
1895      cd          points to the data block with tables pointers
1896    
1897  Returns:      TRUE on success  Returns:      TRUE on success
1898  */  */
1899    
1900  static BOOL  static BOOL
1901  compile_regex(int options, int optchanged, int *brackets, uschar **codeptr,  compile_regex(int options, int optchanged, int *brackets, uschar **codeptr,
1902    const uschar **ptrptr, const char **errorptr, BOOL lookbehind, int condref)    const uschar **ptrptr, const char **errorptr, BOOL lookbehind, int condref,
1903      int *reqchar, int *countlits, compile_data *cd)
1904  {  {
1905  const uschar *ptr = *ptrptr;  const uschar *ptr = *ptrptr;
1906  uschar *code = *codeptr;  uschar *code = *codeptr;
# Line 1504  uschar *last_branch = code; Line 1908  uschar *last_branch = code;
1908  uschar *start_bracket = code;  uschar *start_bracket = code;
1909  uschar *reverse_count = NULL;  uschar *reverse_count = NULL;
1910  int oldoptions = options & PCRE_IMS;  int oldoptions = options & PCRE_IMS;
1911    int branchreqchar, branchcountlits;
1912    
1913    *reqchar = -1;
1914    *countlits = INT_MAX;
1915  code += 3;  code += 3;
1916    
1917  /* At the start of a reference-based conditional group, insert the reference  /* At the start of a reference-based conditional group, insert the reference
# Line 1543  for (;;) Line 1950  for (;;)
1950    
1951    /* Now compile the branch */    /* Now compile the branch */
1952    
1953    if (!compile_branch(options, brackets, &code, &ptr, errorptr, &optchanged))    if (!compile_branch(options, brackets, &code, &ptr, errorptr, &optchanged,
1954          &branchreqchar, &branchcountlits, cd))
1955      {      {
1956      *ptrptr = ptr;      *ptrptr = ptr;
1957      return FALSE;      return FALSE;
# Line 1555  for (;;) Line 1963  for (;;)
1963    last_branch[1] = length >> 8;    last_branch[1] = length >> 8;
1964    last_branch[2] = length & 255;    last_branch[2] = length & 255;
1965    
1966      /* Save the last required character if all branches have the same; a current
1967      value of -1 means unset, while -2 means "previous branch had no last required
1968      char".  */
1969    
1970      if (*reqchar != -2)
1971        {
1972        if (branchreqchar >= 0)
1973          {
1974          if (*reqchar == -1) *reqchar = branchreqchar;
1975          else if (*reqchar != branchreqchar) *reqchar = -2;
1976          }
1977        else *reqchar = -2;
1978        }
1979    
1980      /* Keep the shortest literal count */
1981    
1982      if (branchcountlits < *countlits) *countlits = branchcountlits;
1983      DPRINTF(("literal count = %d min=%d\n", branchcountlits, *countlits));
1984    
1985    /* If lookbehind, check that this branch matches a fixed-length string,    /* If lookbehind, check that this branch matches a fixed-length string,
1986    and put the length into the OP_REVERSE item. Temporarily mark the end of    and put the length into the OP_REVERSE item. Temporarily mark the end of
1987    the branch with OP_END. */    the branch with OP_END. */
# Line 1649  for (;;) Line 2076  for (;;)
2076      code += 2;      code += 2;
2077      break;      break;
2078    
2079        case OP_WORD_BOUNDARY:
2080        case OP_NOT_WORD_BOUNDARY:
2081        code++;
2082        break;
2083    
2084      case OP_ASSERT_NOT:      case OP_ASSERT_NOT:
2085      case OP_ASSERTBACK:      case OP_ASSERTBACK:
2086      case OP_ASSERTBACK_NOT:      case OP_ASSERTBACK_NOT:
# Line 1676  all of whose alternatives start with OP_ Line 2108  all of whose alternatives start with OP_
2108  it's anchored. However, if this is a multiline pattern, then only OP_SOD  it's anchored. However, if this is a multiline pattern, then only OP_SOD
2109  counts, since OP_CIRC can match in the middle.  counts, since OP_CIRC can match in the middle.
2110    
2111  A branch is also implicitly anchored if it starts with .* because that will try  A branch is also implicitly anchored if it starts with .* and DOTALL is set,
2112  the rest of the pattern at all possible matching points, so there is no point  because that will try the rest of the pattern at all possible matching points,
2113  trying them again.  so there is no point trying them again.
2114    
2115  Arguments:  Arguments:
2116    code       points to start of expression (the bracket)    code       points to start of expression (the bracket)
# Line 1696  do { Line 2128  do {
2128     register int op = *scode;     register int op = *scode;
2129     if (op >= OP_BRA || op == OP_ASSERT || op == OP_ONCE || op == OP_COND)     if (op >= OP_BRA || op == OP_ASSERT || op == OP_ONCE || op == OP_COND)
2130       { if (!is_anchored(scode, options)) return FALSE; }       { if (!is_anchored(scode, options)) return FALSE; }
2131     else if (op == OP_TYPESTAR || op == OP_TYPEMINSTAR)     else if ((op == OP_TYPESTAR || op == OP_TYPEMINSTAR) &&
2132                (*options & PCRE_DOTALL) != 0)
2133       { if (scode[1] != OP_ANY) return FALSE; }       { if (scode[1] != OP_ANY) return FALSE; }
2134     else if (op != OP_SOD &&     else if (op != OP_SOD &&
2135             ((*options & PCRE_MULTILINE) != 0 || op != OP_CIRC))             ((*options & PCRE_MULTILINE) != 0 || op != OP_CIRC))
# Line 1710  return TRUE; Line 2143  return TRUE;
2143    
2144    
2145  /*************************************************  /*************************************************
2146  *     Check for start with \n line expression    *  *         Check for starting with ^ or .*        *
2147  *************************************************/  *************************************************/
2148    
2149  /* This is called for multiline expressions to try to find out if every branch  /* This is called to find out if every branch starts with ^ or .* so that
2150  starts with ^ so that "first char" processing can be done to speed things up.  "first char" processing can be done to speed things up in multiline
2151    matching and for non-DOTALL patterns that start with .* (which must start at
2152    the beginning or after \n).
2153    
2154  Argument:  points to start of expression (the bracket)  Argument:  points to start of expression (the bracket)
2155  Returns:   TRUE or FALSE  Returns:   TRUE or FALSE
# Line 1728  do { Line 2163  do {
2163     register int op = *scode;     register int op = *scode;
2164     if (op >= OP_BRA || op == OP_ASSERT || op == OP_ONCE || op == OP_COND)     if (op >= OP_BRA || op == OP_ASSERT || op == OP_ONCE || op == OP_COND)
2165       { if (!is_startline(scode)) return FALSE; }       { if (!is_startline(scode)) return FALSE; }
2166       else if (op == OP_TYPESTAR || op == OP_TYPEMINSTAR)
2167         { if (scode[1] != OP_ANY) return FALSE; }
2168     else if (op != OP_CIRC) return FALSE;     else if (op != OP_CIRC) return FALSE;
2169     code += (code[1] << 8) + code[2];     code += (code[1] << 8) + code[2];
2170     }     }
# Line 1813  Arguments: Line 2250  Arguments:
2250    options      various option bits    options      various option bits
2251    errorptr     pointer to pointer to error text    errorptr     pointer to pointer to error text
2252    erroroffset  ptr offset in pattern where error was detected    erroroffset  ptr offset in pattern where error was detected
2253      tables       pointer to character tables or NULL
2254    
2255  Returns:       pointer to compiled data block, or NULL on error,  Returns:       pointer to compiled data block, or NULL on error,
2256                 with errorptr and erroroffset set                 with errorptr and erroroffset set
# Line 1820  Returns:       pointer to compiled data Line 2258  Returns:       pointer to compiled data
2258    
2259  pcre *  pcre *
2260  pcre_compile(const char *pattern, int options, const char **errorptr,  pcre_compile(const char *pattern, int options, const char **errorptr,
2261    int *erroroffset)    int *erroroffset, const unsigned char *tables)
2262  {  {
2263  real_pcre *re;  real_pcre *re;
2264  int length = 3;      /* For initial BRA plus length */  int length = 3;      /* For initial BRA plus length */
2265  int runlength;  int runlength;
2266  int c, size;  int c, reqchar, countlits;
2267  int bracount = 0;  int bracount = 0;
2268  int top_backref = 0;  int top_backref = 0;
2269  int branch_extra = 0;  int branch_extra = 0;
2270  int branch_newextra;  int branch_newextra;
2271  unsigned int brastackptr = 0;  unsigned int brastackptr = 0;
2272    size_t size;
2273  uschar *code;  uschar *code;
2274  const uschar *ptr;  const uschar *ptr;
2275    compile_data compile_block;
2276  int brastack[BRASTACK_SIZE];  int brastack[BRASTACK_SIZE];
2277  uschar bralenstack[BRASTACK_SIZE];  uschar bralenstack[BRASTACK_SIZE];
2278    
# Line 1861  if ((options & ~PUBLIC_OPTIONS) != 0) Line 2301  if ((options & ~PUBLIC_OPTIONS) != 0)
2301    return NULL;    return NULL;
2302    }    }
2303    
2304    /* Set up pointers to the individual character tables */
2305    
2306    if (tables == NULL) tables = pcre_default_tables;
2307    compile_block.lcc = tables + lcc_offset;
2308    compile_block.fcc = tables + fcc_offset;
2309    compile_block.cbits = tables + cbits_offset;
2310    compile_block.ctypes = tables + ctypes_offset;
2311    
2312    /* Reflect pattern for debugging output */
2313    
2314  DPRINTF(("------------------------------------------------------------------\n"));  DPRINTF(("------------------------------------------------------------------\n"));
2315  DPRINTF(("%s\n", pattern));  DPRINTF(("%s\n", pattern));
2316    
# Line 1879  while ((c = *(++ptr)) != 0) Line 2329  while ((c = *(++ptr)) != 0)
2329    
2330    if ((options & PCRE_EXTENDED) != 0)    if ((options & PCRE_EXTENDED) != 0)
2331      {      {
2332      if ((pcre_ctypes[c] & ctype_space) != 0) continue;      if ((compile_block.ctypes[c] & ctype_space) != 0) continue;
2333      if (c == '#')      if (c == '#')
2334        {        {
2335        while ((c = *(++ptr)) != 0 && c != '\n');        /* The space before the ; is to avoid a warning on a silly compiler
2336          on the Macintosh. */
2337          while ((c = *(++ptr)) != 0 && c != '\n') ;
2338        continue;        continue;
2339        }        }
2340      }      }
# Line 1897  while ((c = *(++ptr)) != 0) Line 2349  while ((c = *(++ptr)) != 0)
2349      case '\\':      case '\\':
2350        {        {
2351        const uschar *save_ptr = ptr;        const uschar *save_ptr = ptr;
2352        c = check_escape(&ptr, errorptr, bracount, options, FALSE);        c = check_escape(&ptr, errorptr, bracount, options, FALSE, &compile_block);
2353        if (*errorptr != NULL) goto PCRE_ERROR_RETURN;        if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
2354        if (c >= 0)        if (c >= 0)
2355          {          {
# Line 1917  while ((c = *(++ptr)) != 0) Line 2369  while ((c = *(++ptr)) != 0)
2369        int refnum = -c - ESC_REF;        int refnum = -c - ESC_REF;
2370        if (refnum > top_backref) top_backref = refnum;        if (refnum > top_backref) top_backref = refnum;
2371        length++;   /* For single back reference */        length++;   /* For single back reference */
2372        if (ptr[1] == '{' && is_counted_repeat(ptr+2))        if (ptr[1] == '{' && is_counted_repeat(ptr+2, &compile_block))
2373          {          {
2374          ptr = read_repeat_counts(ptr+2, &min, &max, errorptr);          ptr = read_repeat_counts(ptr+2, &min, &max, errorptr, &compile_block);
2375          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
2376          if ((min == 0 && (max == 1 || max == -1)) ||          if ((min == 0 && (max == 1 || max == -1)) ||
2377            (min == 1 && max == -1))            (min == 1 && max == -1))
# Line 1943  while ((c = *(++ptr)) != 0) Line 2395  while ((c = *(++ptr)) != 0)
2395      or back reference. */      or back reference. */
2396    
2397      case '{':      case '{':
2398      if (!is_counted_repeat(ptr+1)) goto NORMAL_CHAR;      if (!is_counted_repeat(ptr+1, &compile_block)) goto NORMAL_CHAR;
2399      ptr = read_repeat_counts(ptr+1, &min, &max, errorptr);      ptr = read_repeat_counts(ptr+1, &min, &max, errorptr, &compile_block);
2400      if (*errorptr != NULL) goto PCRE_ERROR_RETURN;      if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
2401      if ((min == 0 && (max == 1 || max == -1)) ||      if ((min == 0 && (max == 1 || max == -1)) ||
2402        (min == 1 && max == -1))        (min == 1 && max == -1))
# Line 1979  while ((c = *(++ptr)) != 0) Line 2431  while ((c = *(++ptr)) != 0)
2431        {        {
2432        if (*ptr == '\\')        if (*ptr == '\\')
2433          {          {
2434          int ch = check_escape(&ptr, errorptr, bracount, options, TRUE);          int ch = check_escape(&ptr, errorptr, bracount, options, TRUE,
2435              &compile_block);
2436          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
2437          if (-ch == ESC_b) class_charcount++; else class_charcount = 10;          if (-ch == ESC_b) class_charcount++; else class_charcount = 10;
2438          }          }
# Line 1996  while ((c = *(++ptr)) != 0) Line 2449  while ((c = *(++ptr)) != 0)
2449    
2450        /* A repeat needs either 1 or 5 bytes. */        /* A repeat needs either 1 or 5 bytes. */
2451    
2452        if (*ptr != 0 && ptr[1] == '{' && is_counted_repeat(ptr+2))        if (*ptr != 0 && ptr[1] == '{' && is_counted_repeat(ptr+2, &compile_block))
2453          {          {
2454          ptr = read_repeat_counts(ptr+2, &min, &max, errorptr);          ptr = read_repeat_counts(ptr+2, &min, &max, errorptr, &compile_block);
2455          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
2456          if ((min == 0 && (max == 1 || max == -1)) ||          if ((min == 0 && (max == 1 || max == -1)) ||
2457            (min == 1 && max == -1))            (min == 1 && max == -1))
# Line 2046  while ((c = *(++ptr)) != 0) Line 2499  while ((c = *(++ptr)) != 0)
2499          ptr += 2;          ptr += 2;
2500          break;          break;
2501    
2502            /* A recursive call to the regex is an extension, to provide the
2503            facility which can be obtained by $(?p{perl-code}) in Perl 5.6. */
2504    
2505            case 'R':
2506            if (ptr[3] != ')')
2507              {
2508              *errorptr = ERR29;
2509              goto PCRE_ERROR_RETURN;
2510              }
2511            ptr += 3;
2512            length += 1;
2513            break;
2514    
2515          /* Lookbehinds are in Perl from version 5.005 */          /* Lookbehinds are in Perl from version 5.005 */
2516    
2517          case '<':          case '<':
# Line 2064  while ((c = *(++ptr)) != 0) Line 2530  while ((c = *(++ptr)) != 0)
2530          group. */          group. */
2531    
2532          case '(':          case '(':
2533          if ((pcre_ctypes[ptr[3]] & ctype_digit) != 0)          if ((compile_block.ctypes[ptr[3]] & ctype_digit) != 0)
2534            {            {
2535            ptr += 4;            ptr += 4;
2536            length += 2;            length += 2;
2537            while ((pcre_ctypes[*ptr] & ctype_digit) != 0) ptr++;            while ((compile_block.ctypes[*ptr] & ctype_digit) != 0) ptr++;
2538            if (*ptr != ')')            if (*ptr != ')')
2539              {              {
2540              *errorptr = ERR26;              *errorptr = ERR26;
# Line 2078  while ((c = *(++ptr)) != 0) Line 2544  while ((c = *(++ptr)) != 0)
2544          else   /* An assertion must follow */          else   /* An assertion must follow */
2545            {            {
2546            ptr++;   /* Can treat like ':' as far as spacing is concerned */            ptr++;   /* Can treat like ':' as far as spacing is concerned */
2547              if (ptr[2] != '?' ||
2548            if (ptr[2] != '?' || strchr("=!<", ptr[3]) == NULL)               (ptr[3] != '=' && ptr[3] != '!' && ptr[3] != '<') )
2549              {              {
2550              ptr += 2;    /* To get right offset in message */              ptr += 2;    /* To get right offset in message */
2551              *errorptr = ERR28;              *errorptr = ERR28;
# Line 2153  while ((c = *(++ptr)) != 0) Line 2619  while ((c = *(++ptr)) != 0)
2619              will lead to an over-estimate on the length, but this shouldn't              will lead to an over-estimate on the length, but this shouldn't
2620              matter very much. We also have to allow for resetting options at              matter very much. We also have to allow for resetting options at
2621              the start of any alternations, which we do by setting              the start of any alternations, which we do by setting
2622              branch_newextra to 2. */              branch_newextra to 2. Finally, we record whether the case-dependent
2623                flag ever changes within the regex. This is used by the "required
2624                character" code. */
2625    
2626              case ':':              case ':':
2627              if (((set|unset) & PCRE_IMS) != 0)              if (((set|unset) & PCRE_IMS) != 0)
2628                {                {
2629                length += 4;                length += 4;
2630                branch_newextra = 2;                branch_newextra = 2;
2631                  if (((set|unset) & PCRE_CASELESS) != 0) options |= PCRE_ICHANGED;
2632                }                }
2633              goto END_OPTIONS;              goto END_OPTIONS;
2634    
# Line 2237  while ((c = *(++ptr)) != 0) Line 2706  while ((c = *(++ptr)) != 0)
2706        /* Leave ptr at the final char; for read_repeat_counts this happens        /* Leave ptr at the final char; for read_repeat_counts this happens
2707        automatically; for the others we need an increment. */        automatically; for the others we need an increment. */
2708    
2709        if ((c = ptr[1]) == '{' && is_counted_repeat(ptr+2))        if ((c = ptr[1]) == '{' && is_counted_repeat(ptr+2, &compile_block))
2710          {          {
2711          ptr = read_repeat_counts(ptr+2, &minval, &maxval, errorptr);          ptr = read_repeat_counts(ptr+2, &minval, &maxval, errorptr,
2712              &compile_block);
2713          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
2714          }          }
2715        else if (c == '*') { minval = 0; maxval = -1; ptr++; }        else if (c == '*') { minval = 0; maxval = -1; ptr++; }
2716        else if (c == '+') { maxval = -1; ptr++; }        else if (c == '+') { maxval = -1; ptr++; }
2717        else if (c == '?') { minval = 0; ptr++; }        else if (c == '?') { minval = 0; ptr++; }
2718    
2719        /* If there is a minimum > 1 we have to replicate up to minval-1 times;        /* If the minimum is zero, we have to allow for an OP_BRAZERO before the
2720        if there is a limited maximum we have to replicate up to maxval-1 times        group, and if the maximum is greater than zero, we have to replicate
2721        and allow for a BRAZERO item before each optional copy, as we also have        maxval-1 times; each replication acquires an OP_BRAZERO plus a nesting
2722        to do before the first copy if the minimum is zero. */        bracket set - hence the 7. */
2723    
2724        if (minval == 0) length++;        if (minval == 0)
2725          else if (minval > 1) length += (minval - 1) * duplength;          {
2726        if (maxval > minval) length += (maxval - minval) * (duplength + 1);          length++;
2727            if (maxval > 0) length += (maxval - 1) * (duplength + 7);
2728            }
2729    
2730          /* When the minimum is greater than zero, 1 we have to replicate up to
2731          minval-1 times, with no additions required in the copies. Then, if
2732          there is a limited maximum we have to replicate up to maxval-1 times
2733          allowing for a BRAZERO item before each optional copy and nesting
2734          brackets for all but one of the optional copies. */
2735    
2736          else
2737            {
2738            length += (minval - 1) * duplength;
2739            if (maxval > minval)   /* Need this test as maxval=-1 means no limit */
2740              length += (maxval - minval) * (duplength + 7) - 6;
2741            }
2742        }        }
2743      continue;      continue;
2744    
# Line 2270  while ((c = *(++ptr)) != 0) Line 2755  while ((c = *(++ptr)) != 0)
2755        {        {
2756        if ((options & PCRE_EXTENDED) != 0)        if ((options & PCRE_EXTENDED) != 0)
2757          {          {
2758          if ((pcre_ctypes[c] & ctype_space) != 0) continue;          if ((compile_block.ctypes[c] & ctype_space) != 0) continue;
2759          if (c == '#')          if (c == '#')
2760            {            {
2761            while ((c = *(++ptr)) != 0 && c != '\n');            /* The space before the ; is to avoid a warning on a silly compiler
2762              on the Macintosh. */
2763              while ((c = *(++ptr)) != 0 && c != '\n') ;
2764            continue;            continue;
2765            }            }
2766          }          }
# Line 2284  while ((c = *(++ptr)) != 0) Line 2771  while ((c = *(++ptr)) != 0)
2771        if (c == '\\')        if (c == '\\')
2772          {          {
2773          const uschar *saveptr = ptr;          const uschar *saveptr = ptr;
2774          c = check_escape(&ptr, errorptr, bracount, options, FALSE);          c = check_escape(&ptr, errorptr, bracount, options, FALSE,
2775              &compile_block);
2776          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
2777          if (c < 0) { ptr = saveptr; break; }          if (c < 0) { ptr = saveptr; break; }
2778          }          }
# Line 2296  while ((c = *(++ptr)) != 0) Line 2784  while ((c = *(++ptr)) != 0)
2784    
2785      /* This "while" is the end of the "do" above. */      /* This "while" is the end of the "do" above. */
2786    
2787      while (runlength < 255 && (pcre_ctypes[c = *(++ptr)] & ctype_meta) == 0);      while (runlength < 255 &&
2788          (compile_block.ctypes[c = *(++ptr)] & ctype_meta) == 0);
2789    
2790      ptr--;      ptr--;
2791      length += runlength;      length += runlength;
# Line 2327  if (re == NULL) Line 2816  if (re == NULL)
2816    return NULL;    return NULL;
2817    }    }
2818    
2819  /* Put in the magic number and the options. */  /* Put in the magic number, and save the size, options, and table pointer */
2820    
2821  re->magic_number = MAGIC_NUMBER;  re->magic_number = MAGIC_NUMBER;
2822    re->size = size;
2823  re->options = options;  re->options = options;
2824    re->tables = tables;
2825    
2826  /* Set up a starting, non-extracting bracket, then compile the expression. On  /* Set up a starting, non-extracting bracket, then compile the expression. On
2827  error, *errorptr will be set non-NULL, so we don't need to look at the result  error, *errorptr will be set non-NULL, so we don't need to look at the result
# Line 2340  ptr = (const uschar *)pattern; Line 2831  ptr = (const uschar *)pattern;
2831  code = re->code;  code = re->code;
2832  *code = OP_BRA;  *code = OP_BRA;
2833  bracount = 0;  bracount = 0;
2834  (void)compile_regex(options, -1, &bracount, &code, &ptr, errorptr, FALSE, -1);  (void)compile_regex(options, -1, &bracount, &code, &ptr, errorptr, FALSE, -1,
2835      &reqchar, &countlits, &compile_block);
2836  re->top_bracket = bracount;  re->top_bracket = bracount;
2837  re->top_backref = top_backref;  re->top_backref = top_backref;
2838    
# Line 2372  if (*errorptr != NULL) Line 2864  if (*errorptr != NULL)
2864    return NULL;    return NULL;
2865    }    }
2866    
2867  /* If the anchored option was not passed, set flag if we can determine that it  /* If the anchored option was not passed, set flag if we can determine that the
2868  is anchored by virtue of ^ characters or \A or anything else. Otherwise, see if  pattern is anchored by virtue of ^ characters or \A or anything else (such as
2869  we can determine what the first character has to be, because that speeds up  starting with .* when DOTALL is set).
2870  unanchored matches no end. In the case of multiline matches, an alternative is  
2871  to set the PCRE_STARTLINE flag if all branches start with ^. */  Otherwise, see if we can determine what the first character has to be, because
2872    that speeds up unanchored matches no end. If not, see if we can set the
2873    PCRE_STARTLINE flag. This is helpful for multiline matches when all branches
2874    start with ^. and also when all branches start with .* for non-DOTALL matches.
2875    */
2876    
2877  if ((options & PCRE_ANCHORED) == 0)  if ((options & PCRE_ANCHORED) == 0)
2878    {    {
# Line 2396  if ((options & PCRE_ANCHORED) == 0) Line 2892  if ((options & PCRE_ANCHORED) == 0)
2892      }      }
2893    }    }
2894    
2895    /* Save the last required character if there are at least two literal
2896    characters on all paths, or if there is no first character setting. */
2897    
2898    if (reqchar >= 0 && (countlits > 1 || (re->options & PCRE_FIRSTSET) == 0))
2899      {
2900      re->req_char = reqchar;
2901      re->options |= PCRE_REQCHSET;
2902      }
2903    
2904  /* Print out the compiled data for debugging */  /* Print out the compiled data for debugging */
2905    
2906  #ifdef DEBUG  #ifdef DEBUG
# Line 2405  printf("Length = %d top_bracket = %d top Line 2910  printf("Length = %d top_bracket = %d top
2910    
2911  if (re->options != 0)  if (re->options != 0)
2912    {    {
2913    printf("%s%s%s%s%s%s%s%s\n",    printf("%s%s%s%s%s%s%s%s%s\n",
2914      ((re->options & PCRE_ANCHORED) != 0)? "anchored " : "",      ((re->options & PCRE_ANCHORED) != 0)? "anchored " : "",
2915      ((re->options & PCRE_CASELESS) != 0)? "caseless " : "",      ((re->options & PCRE_CASELESS) != 0)? "caseless " : "",
2916        ((re->options & PCRE_ICHANGED) != 0)? "case state changed " : "",
2917      ((re->options & PCRE_EXTENDED) != 0)? "extended " : "",      ((re->options & PCRE_EXTENDED) != 0)? "extended " : "",
2918      ((re->options & PCRE_MULTILINE) != 0)? "multiline " : "",      ((re->options & PCRE_MULTILINE) != 0)? "multiline " : "",
2919      ((re->options & PCRE_DOTALL) != 0)? "dotall " : "",      ((re->options & PCRE_DOTALL) != 0)? "dotall " : "",
# Line 2422  if ((re->options & PCRE_FIRSTSET) != 0) Line 2928  if ((re->options & PCRE_FIRSTSET) != 0)
2928      else printf("First char = \\x%02x\n", re->first_char);      else printf("First char = \\x%02x\n", re->first_char);
2929    }    }
2930    
2931    if ((re->options & PCRE_REQCHSET) != 0)
2932      {
2933      if (isprint(re->req_char)) printf("Req char = %c\n", re->req_char);
2934        else printf("Req char = \\x%02x\n", re->req_char);
2935      }
2936    
2937  code_end = code;  code_end = code;
2938  code_base = code = re->code;  code_base = code = re->code;
2939    
# Line 2637  return (pcre *)re; Line 3149  return (pcre *)re;
3149    
3150    
3151  /*************************************************  /*************************************************
 *        Match a character type                  *  
 *************************************************/  
   
 /* Not used in all the places it might be as it's sometimes faster  
 to put the code inline.  
   
 Arguments:  
   type        the character type  
   c           the character  
   dotall      the dotall flag  
   
 Returns:      TRUE if character is of the type  
 */  
   
 static BOOL  
 match_type(int type, int c, BOOL dotall)  
 {  
   
 #ifdef DEBUG  
 if (isprint(c)) printf("matching subject %c against ", c);  
   else printf("matching subject \\x%02x against ", c);  
 printf("%s\n", OP_names[type]);  
 #endif  
   
 switch(type)  
   {  
   case OP_ANY:            return dotall || c != '\n';  
   case OP_NOT_DIGIT:      return (pcre_ctypes[c] & ctype_digit) == 0;  
   case OP_DIGIT:          return (pcre_ctypes[c] & ctype_digit) != 0;  
   case OP_NOT_WHITESPACE: return (pcre_ctypes[c] & ctype_space) == 0;  
   case OP_WHITESPACE:     return (pcre_ctypes[c] & ctype_space) != 0;  
   case OP_NOT_WORDCHAR:   return (pcre_ctypes[c] & ctype_word) == 0;  
   case OP_WORDCHAR:       return (pcre_ctypes[c] & ctype_word) != 0;  
   }  
 return FALSE;  
 }  
   
   
   
 /*************************************************  
3152  *          Match a back-reference                *  *          Match a back-reference                *
3153  *************************************************/  *************************************************/
3154    
# Line 2695  Returns:      TRUE if matched Line 3167  Returns:      TRUE if matched
3167    
3168  static BOOL  static BOOL
3169  match_ref(int offset, register const uschar *eptr, int length, match_data *md,  match_ref(int offset, register const uschar *eptr, int length, match_data *md,
3170    int ims)    unsigned long int ims)
3171  {  {
3172  const uschar *p = md->start_subject + md->offset_vector[offset];  const uschar *p = md->start_subject + md->offset_vector[offset];
3173    
# Line 2719  if (length > md->end_subject - eptr) ret Line 3191  if (length > md->end_subject - eptr) ret
3191  /* Separate the caselesss case for speed */  /* Separate the caselesss case for speed */
3192    
3193  if ((ims & PCRE_CASELESS) != 0)  if ((ims & PCRE_CASELESS) != 0)
3194    { while (length-- > 0) if (pcre_lcc[*p++] != pcre_lcc[*eptr++]) return FALSE; }    {
3195      while (length-- > 0)
3196        if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE;
3197      }
3198  else  else
3199    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
3200    
# Line 2743  Arguments: Line 3218  Arguments:
3218     offset_top  current top pointer     offset_top  current top pointer
3219     md          pointer to "static" info for the match     md          pointer to "static" info for the match
3220     ims         current /i, /m, and /s options     ims         current /i, /m, and /s options
3221     condassert  TRUE if called to check a condition assertion     eptrb       pointer to chain of blocks containing eptr at start of
3222     eptrb       eptr at start of last bracket                   brackets - for testing for empty matches
3223       flags       can contain
3224                     match_condassert - this is an assertion condition
3225                     match_isgroup - this is the start of a bracketed group
3226    
3227  Returns:       TRUE if matched  Returns:       TRUE if matched
3228  */  */
3229    
3230  static BOOL  static BOOL
3231  match(register const uschar *eptr, register const uschar *ecode,  match(register const uschar *eptr, register const uschar *ecode,
3232    int offset_top, match_data *md, int ims, BOOL condassert, const uschar *eptrb)    int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,
3233      int flags)
3234  {  {
3235  int original_ims = ims;   /* Save for resetting on ')' */  unsigned long int original_ims = ims;   /* Save for resetting on ')' */
3236    eptrblock newptrb;
3237    
3238    /* At the start of a bracketed group, add the current subject pointer to the
3239    stack of such pointers, to be re-instated at the end of the group when we hit
3240    the closing ket. When match() is called in other circumstances, we don't add to
3241    the stack. */
3242    
3243    if ((flags & match_isgroup) != 0)
3244      {
3245      newptrb.prev = eptrb;
3246      newptrb.saved_eptr = eptr;
3247      eptrb = &newptrb;
3248      }
3249    
3250    /* Now start processing the operations. */
3251    
3252  for (;;)  for (;;)
3253    {    {
# Line 2782  for (;;) Line 3276  for (;;)
3276      int number = op - OP_BRA;      int number = op - OP_BRA;
3277      int offset = number << 1;      int offset = number << 1;
3278    
3279      DPRINTF(("start bracket %d\n", number));  #ifdef DEBUG
3280        printf("start bracket %d subject=", number);
3281        pchars(eptr, 16, TRUE, md);
3282        printf("\n");
3283    #endif
3284    
3285      if (offset < md->offset_max)      if (offset < md->offset_max)
3286        {        {
# Line 2795  for (;;) Line 3293  for (;;)
3293    
3294        do        do
3295          {          {
3296          if (match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr)) return TRUE;          if (match(eptr, ecode+3, offset_top, md, ims, eptrb, match_isgroup))
3297              return TRUE;
3298          ecode += (ecode[1] << 8) + ecode[2];          ecode += (ecode[1] << 8) + ecode[2];
3299          }          }
3300        while (*ecode == OP_ALT);        while (*ecode == OP_ALT);
# Line 2821  for (;;) Line 3320  for (;;)
3320      DPRINTF(("start bracket 0\n"));      DPRINTF(("start bracket 0\n"));
3321      do      do
3322        {        {
3323        if (match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr)) return TRUE;        if (match(eptr, ecode+3, offset_top, md, ims, eptrb, match_isgroup))
3324            return TRUE;
3325        ecode += (ecode[1] << 8) + ecode[2];        ecode += (ecode[1] << 8) + ecode[2];
3326        }        }
3327      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 2840  for (;;) Line 3340  for (;;)
3340        return match(eptr,        return match(eptr,
3341          ecode + ((offset < offset_top && md->offset_vector[offset] >= 0)?          ecode + ((offset < offset_top && md->offset_vector[offset] >= 0)?
3342            5 : 3 + (ecode[1] << 8) + ecode[2]),            5 : 3 + (ecode[1] << 8) + ecode[2]),
3343          offset_top, md, ims, FALSE, eptr);          offset_top, md, ims, eptrb, match_isgroup);
3344        }        }
3345    
3346      /* The condition is an assertion. Call match() to evaluate it - setting      /* The condition is an assertion. Call match() to evaluate it - setting
# Line 2848  for (;;) Line 3348  for (;;)
3348    
3349      else      else
3350        {        {
3351        if (match(eptr, ecode+3, offset_top, md, ims, TRUE, NULL))        if (match(eptr, ecode+3, offset_top, md, ims, NULL,
3352              match_condassert | match_isgroup))
3353          {          {
3354          ecode += 3 + (ecode[4] << 8) + ecode[5];          ecode += 3 + (ecode[4] << 8) + ecode[5];
3355          while (*ecode == OP_ALT) ecode += (ecode[1] << 8) + ecode[2];          while (*ecode == OP_ALT) ecode += (ecode[1] << 8) + ecode[2];
3356          }          }
3357        else ecode += (ecode[1] << 8) + ecode[2];        else ecode += (ecode[1] << 8) + ecode[2];
3358        return match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr);        return match(eptr, ecode+3, offset_top, md, ims, eptrb, match_isgroup);
3359        }        }
3360      /* Control never reaches here */      /* Control never reaches here */
3361    
# Line 2864  for (;;) Line 3365  for (;;)
3365      ecode += 2;      ecode += 2;
3366      break;      break;
3367    
3368      /* End of the pattern */      /* End of the pattern. If PCRE_NOTEMPTY is set, fail if we have matched
3369        an empty string - recursion will then try other alternatives, if any. */
3370    
3371      case OP_END:      case OP_END:
3372        if (md->notempty && eptr == md->start_match) return FALSE;
3373      md->end_match_ptr = eptr;          /* Record where we ended */      md->end_match_ptr = eptr;          /* Record where we ended */
3374      md->end_offset_top = offset_top;   /* and how many extracts were taken */      md->end_offset_top = offset_top;   /* and how many extracts were taken */
3375      return TRUE;      return TRUE;
# Line 2876  for (;;) Line 3379  for (;;)
3379      case OP_OPT:      case OP_OPT:
3380      ims = ecode[1];      ims = ecode[1];
3381      ecode += 2;      ecode += 2;
3382      DPRINTF(("ims set to %02x\n", ims));      DPRINTF(("ims set to %02lx\n", ims));
3383      break;      break;
3384    
3385      /* Assertion brackets. Check the alternative branches in turn - the      /* Assertion brackets. Check the alternative branches in turn - the
# Line 2889  for (;;) Line 3392  for (;;)
3392      case OP_ASSERTBACK:      case OP_ASSERTBACK:
3393      do      do
3394        {        {
3395        if (match(eptr, ecode+3, offset_top, md, ims, FALSE, NULL)) break;        if (match(eptr, ecode+3, offset_top, md, ims, NULL, match_isgroup)) break;
3396        ecode += (ecode[1] << 8) + ecode[2];        ecode += (ecode[1] << 8) + ecode[2];
3397        }        }
3398      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 2897  for (;;) Line 3400  for (;;)
3400    
3401      /* If checking an assertion for a condition, return TRUE. */      /* If checking an assertion for a condition, return TRUE. */
3402    
3403      if (condassert) return TRUE;      if ((flags & match_condassert) != 0) return TRUE;
3404    
3405      /* Continue from after the assertion, updating the offsets high water      /* Continue from after the assertion, updating the offsets high water
3406      mark, since extracts may have been taken during the assertion. */      mark, since extracts may have been taken during the assertion. */
# Line 2913  for (;;) Line 3416  for (;;)
3416      case OP_ASSERTBACK_NOT:      case OP_ASSERTBACK_NOT:
3417      do      do
3418        {        {
3419        if (match(eptr, ecode+3, offset_top, md, ims, FALSE, NULL)) return FALSE;        if (match(eptr, ecode+3, offset_top, md, ims, NULL, match_isgroup))
3420            return FALSE;
3421        ecode += (ecode[1] << 8) + ecode[2];        ecode += (ecode[1] << 8) + ecode[2];
3422        }        }
3423      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
3424    
3425      if (condassert) return TRUE;      if ((flags & match_condassert) != 0) return TRUE;
3426    
3427      ecode += 3;      ecode += 3;
3428      continue;      continue;
3429    
# Line 2932  for (;;) Line 3437  for (;;)
3437      ecode += 3;      ecode += 3;
3438      break;      break;
3439    
3440        /* Recursion matches the current regex, nested. If there are any capturing
3441        brackets started but not finished, we have to save their starting points
3442        and reinstate them after the recursion. However, we don't know how many
3443        such there are (offset_top records the completed total) so we just have
3444        to save all the potential data. There may be up to 99 such values, which
3445        is a bit large to put on the stack, but using malloc for small numbers
3446        seems expensive. As a compromise, the stack is used when there are fewer
3447        than 16 values to store; otherwise malloc is used. A problem is what to do
3448        if the malloc fails ... there is no way of returning to the top level with
3449        an error. Save the top 15 values on the stack, and accept that the rest
3450        may be wrong. */
3451    
3452        case OP_RECURSE:
3453          {
3454          BOOL rc;
3455          int *save;
3456          int stacksave[15];
3457    
3458          c = md->offset_max;
3459    
3460          if (c < 16) save = stacksave; else
3461            {
3462            save = (int *)(pcre_malloc)((c+1) * sizeof(int));
3463            if (save == NULL)
3464              {
3465              save = stacksave;
3466              c = 15;
3467              }
3468            }
3469    
3470          for (i = 1; i <= c; i++)
3471            save[i] = md->offset_vector[md->offset_end - i];
3472          rc = match(eptr, md->start_pattern, offset_top, md, ims, eptrb,
3473            match_isgroup);
3474          for (i = 1; i <= c; i++)
3475            md->offset_vector[md->offset_end - i] = save[i];
3476          if (save != stacksave) (pcre_free)(save);
3477          if (!rc) return FALSE;
3478    
3479          /* In case the recursion has set more capturing values, save the final
3480          number, then move along the subject till after the recursive match,
3481          and advance one byte in the pattern code. */
3482    
3483          offset_top = md->end_offset_top;
3484          eptr = md->end_match_ptr;
3485          ecode++;
3486          }
3487        break;
3488    
3489      /* "Once" brackets are like assertion brackets except that after a match,      /* "Once" brackets are like assertion brackets except that after a match,
3490      the point in the subject string is not moved back. Thus there can never be      the point in the subject string is not moved back. Thus there can never be
# Line 2943  for (;;) Line 3496  for (;;)
3496      case OP_ONCE:      case OP_ONCE:
3497        {        {
3498        const uschar *prev = ecode;        const uschar *prev = ecode;
3499          const uschar *saved_eptr = eptr;
3500    
3501        do        do
3502          {          {
3503          if (match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr)) break;          if (match(eptr, ecode+3, offset_top, md, ims, eptrb, match_isgroup))
3504              break;
3505          ecode += (ecode[1] << 8) + ecode[2];          ecode += (ecode[1] << 8) + ecode[2];
3506          }          }
3507        while (*ecode == OP_ALT);        while (*ecode == OP_ALT);
# Line 2969  for (;;) Line 3524  for (;;)
3524        5.005. If there is an options reset, it will get obeyed in the normal        5.005. If there is an options reset, it will get obeyed in the normal
3525        course of events. */        course of events. */
3526    
3527        if (*ecode == OP_KET || eptr == eptrb)        if (*ecode == OP_KET || eptr == saved_eptr)
3528          {          {
3529          ecode += 3;          ecode += 3;
3530          break;          break;
# Line 2983  for (;;) Line 3538  for (;;)
3538        if (ecode[3] == OP_OPT)        if (ecode[3] == OP_OPT)
3539          {          {
3540          ims = (ims & ~PCRE_IMS) | ecode[4];          ims = (ims & ~PCRE_IMS) | ecode[4];
3541          DPRINTF(("ims set to %02x at group repeat\n", ims));          DPRINTF(("ims set to %02lx at group repeat\n", ims));
3542          }          }
3543    
3544        if (*ecode == OP_KETRMIN)        if (*ecode == OP_KETRMIN)
3545          {          {
3546          if (match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr) ||          if (match(eptr, ecode+3, offset_top, md, ims, eptrb, 0) ||
3547              match(eptr, prev, offset_top, md, ims, FALSE, eptr)) return TRUE;              match(eptr, prev, offset_top, md, ims, eptrb, match_isgroup))
3548                  return TRUE;
3549          }          }
3550        else  /* OP_KETRMAX */        else  /* OP_KETRMAX */
3551          {          {
3552          if (match(eptr, prev, offset_top, md, ims, FALSE, eptr) ||          if (match(eptr, prev, offset_top, md, ims, eptrb, match_isgroup) ||
3553              match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr)) return TRUE;              match(eptr, ecode+3, offset_top, md, ims, eptrb, 0)) return TRUE;
3554          }          }
3555        }        }
3556      return FALSE;      return FALSE;
# Line 3015  for (;;) Line 3571  for (;;)
3571      case OP_BRAZERO:      case OP_BRAZERO:
3572        {        {
3573        const uschar *next = ecode+1;        const uschar *next = ecode+1;
3574        if (match(eptr, next, offset_top, md, ims, FALSE, eptr)) return TRUE;        if (match(eptr, next, offset_top, md, ims, eptrb, match_isgroup))
3575            return TRUE;
3576        do next += (next[1] << 8) + next[2]; while (*next == OP_ALT);        do next += (next[1] << 8) + next[2]; while (*next == OP_ALT);
3577        ecode = next + 3;        ecode = next + 3;
3578        }        }
# Line 3025  for (;;) Line 3582  for (;;)
3582        {        {
3583        const uschar *next = ecode+1;        const uschar *next = ecode+1;
3584        do next += (next[1] << 8) + next[2]; while (*next == OP_ALT);        do next += (next[1] << 8) + next[2]; while (*next == OP_ALT);
3585        if (match(eptr, next+3, offset_top, md, ims, FALSE, eptr)) return TRUE;        if (match(eptr, next+3, offset_top, md, ims, eptrb, match_isgroup))
3586            return TRUE;
3587        ecode++;        ecode++;
3588        }        }
3589      break;      break;
# Line 3040  for (;;) Line 3598  for (;;)
3598      case OP_KETRMAX:      case OP_KETRMAX:
3599        {        {
3600        const uschar *prev = ecode - (ecode[1] << 8) - ecode[2];        const uschar *prev = ecode - (ecode[1] << 8) - ecode[2];
3601          const uschar *saved_eptr = eptrb->saved_eptr;
3602    
3603          eptrb = eptrb->prev;    /* Back up the stack of bracket start pointers */
3604    
3605        if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||        if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
3606            *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||            *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
# Line 3059  for (;;) Line 3620  for (;;)
3620          int number = *prev - OP_BRA;          int number = *prev - OP_BRA;
3621          int offset = number << 1;          int offset = number << 1;
3622    
3623          DPRINTF(("end bracket %d\n", number));  #ifdef DEBUG
3624            printf("end bracket %d", number);
3625            printf("\n");
3626    #endif
3627    
3628          if (number > 0)          if (number > 0)
3629            {            {
# Line 3077  for (;;) Line 3641  for (;;)
3641        the group. */        the group. */
3642    
3643        ims = original_ims;        ims = original_ims;
3644        DPRINTF(("ims reset to %02x\n", ims));        DPRINTF(("ims reset to %02lx\n", ims));
3645    
3646        /* For a non-repeating ket, just continue at this level. This also        /* For a non-repeating ket, just continue at this level. This also
3647        happens for a repeating ket if no characters were matched in the group.        happens for a repeating ket if no characters were matched in the group.
# Line 3085  for (;;) Line 3649  for (;;)
3649        5.005. If there is an options reset, it will get obeyed in the normal        5.005. If there is an options reset, it will get obeyed in the normal
3650        course of events. */        course of events. */
3651    
3652        if (*ecode == OP_KET || eptr == eptrb)        if (*ecode == OP_KET || eptr == saved_eptr)
3653          {          {
3654          ecode += 3;          ecode += 3;
3655          break;          break;
# Line 3096  for (;;) Line 3660  for (;;)
3660    
3661        if (*ecode == OP_KETRMIN)        if (*ecode == OP_KETRMIN)
3662          {          {
3663          if (match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr) ||          if (match(eptr, ecode+3, offset_top, md, ims, eptrb, 0) ||
3664              match(eptr, prev, offset_top, md, ims, FALSE, eptr)) return TRUE;              match(eptr, prev, offset_top, md, ims, eptrb, match_isgroup))
3665                  return TRUE;
3666          }          }
3667        else  /* OP_KETRMAX */        else  /* OP_KETRMAX */
3668          {          {
3669          if (match(eptr, prev, offset_top, md, ims, FALSE, eptr) ||          if (match(eptr, prev, offset_top, md, ims, eptrb, match_isgroup) ||
3670              match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr)) return TRUE;              match(eptr, ecode+3, offset_top, md, ims, eptrb, 0)) return TRUE;
3671          }          }
3672        }        }
3673      return FALSE;      return FALSE;
# Line 3172  for (;;) Line 3737  for (;;)
3737      case OP_WORD_BOUNDARY:      case OP_WORD_BOUNDARY:
3738        {        {
3739        BOOL prev_is_word = (eptr != md->start_subject) &&        BOOL prev_is_word = (eptr != md->start_subject) &&
3740          ((pcre_ctypes[eptr[-1]] & ctype_word) != 0);          ((md->ctypes[eptr[-1]] & ctype_word) != 0);
3741        BOOL cur_is_word = (eptr < md->end_subject) &&        BOOL cur_is_word = (eptr < md->end_subject) &&
3742          ((pcre_ctypes[*eptr] & ctype_word) != 0);          ((md->ctypes[*eptr] & ctype_word) != 0);
3743        if ((*ecode++ == OP_WORD_BOUNDARY)?        if ((*ecode++ == OP_WORD_BOUNDARY)?
3744             cur_is_word == prev_is_word : cur_is_word != prev_is_word)             cur_is_word == prev_is_word : cur_is_word != prev_is_word)
3745          return FALSE;          return FALSE;
# Line 3191  for (;;) Line 3756  for (;;)
3756      break;      break;
3757    
3758      case OP_NOT_DIGIT:      case OP_NOT_DIGIT:
3759      if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_digit) != 0)      if (eptr >= md->end_subject ||
3760           (md->ctypes[*eptr++] & ctype_digit) != 0)
3761        return FALSE;        return FALSE;
3762      ecode++;      ecode++;
3763      break;      break;
3764    
3765      case OP_DIGIT:      case OP_DIGIT:
3766      if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_digit) == 0)      if (eptr >= md->end_subject ||
3767           (md->ctypes[*eptr++] & ctype_digit) == 0)
3768        return FALSE;        return FALSE;
3769      ecode++;      ecode++;
3770      break;      break;
3771    
3772      case OP_NOT_WHITESPACE:      case OP_NOT_WHITESPACE:
3773      if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_space) != 0)      if (eptr >= md->end_subject ||
3774           (md->ctypes[*eptr++] & ctype_space) != 0)
3775        return FALSE;        return FALSE;
3776      ecode++;      ecode++;
3777      break;      break;
3778    
3779      case OP_WHITESPACE:      case OP_WHITESPACE:
3780      if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_space) == 0)      if (eptr >= md->end_subject ||
3781           (md->ctypes[*eptr++] & ctype_space) == 0)
3782        return FALSE;        return FALSE;
3783      ecode++;      ecode++;
3784      break;      break;
3785    
3786      case OP_NOT_WORDCHAR:      case OP_NOT_WORDCHAR:
3787      if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_word) != 0)      if (eptr >= md->end_subject ||
3788           (md->ctypes[*eptr++] & ctype_word) != 0)
3789        return FALSE;        return FALSE;
3790      ecode++;      ecode++;
3791      break;      break;
3792    
3793      case OP_WORDCHAR:      case OP_WORDCHAR:
3794      if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_word) == 0)      if (eptr >= md->end_subject ||
3795           (md->ctypes[*eptr++] & ctype_word) == 0)
3796        return FALSE;        return FALSE;
3797      ecode++;      ecode++;
3798      break;      break;
# Line 3307  for (;;) Line 3878  for (;;)
3878          {          {
3879          for (i = min;; i++)          for (i = min;; i++)
3880            {            {
3881            if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb))            if (match(eptr, ecode, offset_top, md, ims, eptrb, 0))
3882              return TRUE;              return TRUE;
3883            if (i >= max || !match_ref(offset, eptr, length, md, ims))            if (i >= max || !match_ref(offset, eptr, length, md, ims))
3884              return FALSE;              return FALSE;
# Line 3328  for (;;) Line 3899  for (;;)
3899            }            }
3900          while (eptr >= pp)          while (eptr >= pp)
3901            {            {
3902            if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb))            if (match(eptr, ecode, offset_top, md, ims, eptrb, 0))
3903              return TRUE;              return TRUE;
3904            eptr -= length;            eptr -= length;
3905            }            }
# Line 3399  for (;;) Line 3970  for (;;)
3970          {          {
3971          for (i = min;; i++)          for (i = min;; i++)
3972            {            {
3973            if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb))            if (match(eptr, ecode, offset_top, md, ims, eptrb, 0))
3974              return TRUE;              return TRUE;
3975            if (i >= max || eptr >= md->end_subject) return FALSE;            if (i >= max || eptr >= md->end_subject) return FALSE;
3976            c = *eptr++;            c = *eptr++;
# Line 3423  for (;;) Line 3994  for (;;)
3994            }            }
3995    
3996          while (eptr >= pp)          while (eptr >= pp)
3997            if (match(eptr--, ecode, offset_top, md, ims, FALSE, eptrb))            if (match(eptr--, ecode, offset_top, md, ims, eptrb, 0))
3998              return TRUE;              return TRUE;
3999          return FALSE;          return FALSE;
4000          }          }
# Line 3453  for (;;) Line 4024  for (;;)
4024        if (length > md->end_subject - eptr) return FALSE;        if (length > md->end_subject - eptr) return FALSE;
4025        if ((ims & PCRE_CASELESS) != 0)        if ((ims & PCRE_CASELESS) != 0)
4026          {          {
4027          while (length-- > 0) if (pcre_lcc[*ecode++] != pcre_lcc[*eptr++]) return FALSE;          while (length-- > 0)
4028              if (md->lcc[*ecode++] != md->lcc[*eptr++])
4029                return FALSE;
4030          }          }
4031        else        else
4032          {          {
# Line 3510  for (;;) Line 4083  for (;;)
4083    
4084      if ((ims & PCRE_CASELESS) != 0)      if ((ims & PCRE_CASELESS) != 0)
4085        {        {
4086        c = pcre_lcc[c];        c = md->lcc[c];
4087        for (i = 1; i <= min; i++) if (c != pcre_lcc[*eptr++]) return FALSE;        for (i = 1; i <= min; i++)
4088            if (c != md->lcc[*eptr++]) return FALSE;
4089        if (min == max) continue;        if (min == max) continue;
4090        if (minimize)        if (minimize)
4091          {          {
4092          for (i = min;; i++)          for (i = min;; i++)
4093            {            {
4094            if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb))            if (match(eptr, ecode, offset_top, md, ims, eptrb, 0))
4095              return TRUE;              return TRUE;
4096            if (i >= max || eptr >= md->end_subject || c != pcre_lcc[*eptr++])            if (i >= max || eptr >= md->end_subject ||
4097                  c != md->lcc[*eptr++])
4098              return FALSE;              return FALSE;
4099            }            }
4100          /* Control never gets here */          /* Control never gets here */
# Line 3529  for (;;) Line 4104  for (;;)
4104          const uschar *pp = eptr;          const uschar *pp = eptr;
4105          for (i = min; i < max; i++)          for (i = min; i < max; i++)
4106            {            {
4107            if (eptr >= md->end_subject || c != pcre_lcc[*eptr]) break;            if (eptr >= md->end_subject || c != md->lcc[*eptr]) break;
4108            eptr++;            eptr++;
4109            }            }
4110          while (eptr >= pp)          while (eptr >= pp)
4111            if (match(eptr--, ecode, offset_top, md, ims, FALSE, eptrb))            if (match(eptr--, ecode, offset_top, md, ims, eptrb, 0))
4112              return TRUE;              return TRUE;
4113          return FALSE;          return FALSE;
4114          }          }
# Line 3550  for (;;) Line 4125  for (;;)
4125          {          {
4126          for (i = min;; i++)          for (i = min;; i++)
4127            {            {
4128            if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb))            if (match(eptr, ecode, offset_top, md, ims, eptrb, 0))
4129              return TRUE;              return TRUE;
4130            if (i >= max || eptr >= md->end_subject || c != *eptr++) return FALSE;            if (i >= max || eptr >= md->end_subject || c != *eptr++) return FALSE;
4131            }            }
# Line 3565  for (;;) Line 4140  for (;;)
4140            eptr++;            eptr++;
4141            }            }
4142          while (eptr >= pp)          while (eptr >= pp)
4143           if (match(eptr--, ecode, offset_top, md, ims, FALSE, eptrb))           if (match(eptr--, ecode, offset_top, md, ims, eptrb, 0))
4144             return TRUE;             return TRUE;
4145          return FALSE;          return FALSE;
4146          }          }
# Line 3579  for (;;) Line 4154  for (;;)
4154      ecode++;      ecode++;
4155      if ((ims & PCRE_CASELESS) != 0)      if ((ims & PCRE_CASELESS) != 0)
4156        {        {
4157        if (pcre_lcc[*ecode++] == pcre_lcc[*eptr++]) return FALSE;        if (md->lcc[*ecode++] == md->lcc[*eptr++]) return FALSE;
4158        }        }
4159      else      else
4160        {        {
# Line 3639  for (;;) Line 4214  for (;;)
4214    
4215      if ((ims & PCRE_CASELESS) != 0)      if ((ims & PCRE_CASELESS) != 0)
4216        {        {
4217        c = pcre_lcc[c];        c = md->lcc[c];
4218        for (i = 1; i <= min; i++) if (c == pcre_lcc[*eptr++]) return FALSE;        for (i = 1; i <= min; i++)
4219            if (c == md->lcc[*eptr++]) return FALSE;
4220        if (min == max) continue;        if (min == max) continue;
4221        if (minimize)        if (minimize)
4222          {          {
4223          for (i = min;; i++)          for (i = min;; i++)
4224            {            {
4225            if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb))            if (match(eptr, ecode, offset_top, md, ims, eptrb, 0))
4226              return TRUE;              return TRUE;
4227            if (i >= max || eptr >= md->end_subject || c == pcre_lcc[*eptr++])            if (i >= max || eptr >= md->end_subject ||
4228                  c == md->lcc[*eptr++])
4229              return FALSE;              return FALSE;
4230            }            }
4231          /* Control never gets here */          /* Control never gets here */
# Line 3658  for (;;) Line 4235  for (;;)
4235          const uschar *pp = eptr;          const uschar *pp = eptr;
4236          for (i = min; i < max; i++)          for (i = min; i < max; i++)
4237            {            {
4238            if (eptr >= md->end_subject || c == pcre_lcc[*eptr]) break;            if (eptr >= md->end_subject || c == md->lcc[*eptr]) break;
4239            eptr++;            eptr++;
4240            }            }
4241          while (eptr >= pp)          while (eptr >= pp)
4242            if (match(eptr--, ecode, offset_top, md, ims, FALSE, eptrb))            if (match(eptr--, ecode, offset_top, md, ims, eptrb, 0))
4243              return TRUE;              return TRUE;
4244          return FALSE;          return FALSE;
4245          }          }
# Line 3679  for (;;) Line 4256  for (;;)
4256          {          {
4257          for (i = min;; i++)          for (i = min;; i++)
4258            {            {
4259            if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb))            if (match(eptr, ecode, offset_top, md, ims, eptrb, 0))
4260              return TRUE;              return TRUE;
4261            if (i >= max || eptr >= md->end_subject || c == *eptr++) return FALSE;            if (i >= max || eptr >= md->end_subject || c == *eptr++) return FALSE;
4262            }            }
# Line 3694  for (;;) Line 4271  for (;;)
4271            eptr++;            eptr++;
4272            }            }
4273          while (eptr >= pp)          while (eptr >= pp)
4274           if (match(eptr--, ecode, offset_top, md, ims, FALSE, eptrb))           if (match(eptr--, ecode, offset_top, md, ims, eptrb, 0))
4275             return TRUE;             return TRUE;
4276          return FALSE;          return FALSE;
4277          }          }
# Line 3752  for (;;) Line 4329  for (;;)
4329    
4330        case OP_NOT_DIGIT:        case OP_NOT_DIGIT:
4331        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
4332          if ((pcre_ctypes[*eptr++] & ctype_digit) != 0) return FALSE;          if ((md->ctypes[*eptr++] & ctype_digit) != 0) return FALSE;
4333        break;        break;
4334    
4335        case OP_DIGIT:        case OP_DIGIT:
4336        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
4337          if ((pcre_ctypes[*eptr++] & ctype_digit) == 0) return FALSE;          if ((md->ctypes[*eptr++] & ctype_digit) == 0) return FALSE;
4338        break;        break;
4339    
4340        case OP_NOT_WHITESPACE:        case OP_NOT_WHITESPACE:
4341        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
4342          if ((pcre_ctypes[*eptr++] & ctype_space) != 0) return FALSE;          if ((md->ctypes[*eptr++] & ctype_space) != 0) return FALSE;
4343        break;        break;
4344    
4345        case OP_WHITESPACE:        case OP_WHITESPACE:
4346        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
4347          if ((pcre_ctypes[*eptr++] & ctype_space) == 0) return FALSE;          if ((md->ctypes[*eptr++] & ctype_space) == 0) return FALSE;
4348        break;        break;
4349    
4350        case OP_NOT_WORDCHAR:        case OP_NOT_WORDCHAR:
4351        for (i = 1; i <= min; i++) if ((pcre_ctypes[*eptr++] & ctype_word) != 0)        for (i = 1; i <= min; i++)
4352          return FALSE;          if ((md->ctypes[*eptr++] & ctype_word) != 0)
4353              return FALSE;
4354        break;        break;
4355    
4356        case OP_WORDCHAR:        case OP_WORDCHAR:
4357        for (i = 1; i <= min; i++) if ((pcre_ctypes[*eptr++] & ctype_word) == 0)        for (i = 1; i <= min; i++)
4358          return FALSE;          if ((md->ctypes[*eptr++] & ctype_word) == 0)
4359              return FALSE;
4360        break;        break;
4361        }        }
4362    
# Line 3786  for (;;) Line 4365  for (;;)
4365      if (min == max) continue;      if (min == max) continue;
4366    
4367      /* If minimizing, we have to test the rest of the pattern before each      /* If minimizing, we have to test the rest of the pattern before each
4368      subsequent match, so inlining isn't much help; just use the function. */      subsequent match. */
4369    
4370      if (minimize)      if (minimize)
4371        {        {
4372        for (i = min;; i++)        for (i = min;; i++)
4373          {          {
4374          if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb)) return TRUE;          if (match(eptr, ecode, offset_top, md, ims, eptrb, 0)) return TRUE;
4375          if (i >= max || eptr >= md->end_subject ||          if (i >= max || eptr >= md->end_subject) return FALSE;
4376            !match_type(ctype, *eptr++, (ims & PCRE_DOTALL) != 0))  
4377              return FALSE;          c = *eptr++;
4378            switch(ctype)
4379              {
4380              case OP_ANY:
4381              if ((ims & PCRE_DOTALL) == 0 && c == '\n') return FALSE;
4382              break;
4383    
4384              case OP_NOT_DIGIT:
4385              if ((md->ctypes[c] & ctype_digit) != 0) return FALSE;
4386              break;
4387    
4388              case OP_DIGIT:
4389              if ((md->ctypes[c] & ctype_digit) == 0) return FALSE;
4390              break;
4391    
4392              case OP_NOT_WHITESPACE:
4393              if ((md->ctypes[c] & ctype_space) != 0) return FALSE;
4394              break;
4395    
4396              case OP_WHITESPACE:
4397              if  ((md->ctypes[c] & ctype_space) == 0) return FALSE;
4398              break;
4399    
4400              case OP_NOT_WORDCHAR:
4401              if ((md->ctypes[c] & ctype_word) != 0) return FALSE;
4402              break;
4403    
4404              case OP_WORDCHAR:
4405              if ((md->ctypes[c] & ctype_word) == 0) return FALSE;
4406              break;
4407              }
4408          }          }
4409        /* Control never gets here */        /* Control never gets here */
4410        }        }
# Line 3828  for (;;) Line 4437  for (;;)
4437          case OP_NOT_DIGIT:          case OP_NOT_DIGIT:
4438          for (i = min; i < max; i++)          for (i = min; i < max; i++)
4439            {            {
4440            if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_digit) != 0)            if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) != 0)
4441              break;              break;
4442            eptr++;            eptr++;
4443            }            }
# Line 3837  for (;;) Line 4446  for (;;)
4446          case OP_DIGIT:          case OP_DIGIT:
4447          for (i = min; i < max; i++)          for (i = min; i < max; i++)
4448            {            {
4449            if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_digit) == 0)            if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) == 0)
4450              break;              break;
4451            eptr++;            eptr++;
4452            }            }
# Line 3846  for (;;) Line 4455  for (;;)
4455          case OP_NOT_WHITESPACE:          case OP_NOT_WHITESPACE:
4456          for (i = min; i < max; i++)          for (i = min; i < max; i++)
4457            {            {
4458            if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_space) != 0)            if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) != 0)
4459              break;              break;
4460            eptr++;            eptr++;
4461            }            }
# Line 3855  for (;;) Line 4464  for (;;)
4464          case OP_WHITESPACE:          case OP_WHITESPACE:
4465          for (i = min; i < max; i++)          for (i = min; i < max; i++)
4466            {            {
4467            if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_space) == 0)            if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) == 0)
4468              break;              break;
4469            eptr++;            eptr++;
4470            }            }
# Line 3864  for (;;) Line 4473  for (;;)
4473          case OP_NOT_WORDCHAR:          case OP_NOT_WORDCHAR:
4474          for (i = min; i < max; i++)          for (i = min; i < max; i++)
4475            {            {
4476            if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_word) != 0)            if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) != 0)
4477              break;              break;
4478            eptr++;            eptr++;
4479            }            }
# Line 3873  for (;;) Line 4482  for (;;)
4482          case OP_WORDCHAR:          case OP_WORDCHAR:
4483          for (i = min; i < max; i++)          for (i = min; i < max; i++)
4484            {            {
4485            if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_word) == 0)            if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) == 0)
4486              break;              break;
4487            eptr++;            eptr++;
4488            }            }
# Line 3881  for (;;) Line 4490  for (;;)
4490          }          }
4491    
4492        while (eptr >= pp)        while (eptr >= pp)
4493          if (match(eptr--, ecode, offset_top, md, ims, FALSE, eptrb))          if (match(eptr--, ecode, offset_top, md, ims, eptrb, 0))
4494            return TRUE;            return TRUE;
4495        return FALSE;        return FALSE;
4496        }        }
# Line 3919  Arguments: Line 4528  Arguments:
4528    external_extra  points to "hints" from pcre_study() or is NULL    external_extra  points to "hints" from pcre_study() or is NULL
4529    subject         points to the subject string    subject         points to the subject string
4530    length          length of subject string (may contain binary zeros)    length          length of subject string (may contain binary zeros)
4531      start_offset    where to start in the subject string
4532    options         option bits    options         option bits
4533    offsets         points to a vector of ints to be filled in with offsets    offsets         points to a vector of ints to be filled in with offsets
4534    offsetcount     the number of elements in the vector    offsetcount     the number of elements in the vector
# Line 3931  Returns:          > 0 => success; value Line 4541  Returns:          > 0 => success; value
4541    
4542  int  int
4543  pcre_exec(const pcre *external_re, const pcre_extra *external_extra,  pcre_exec(const pcre *external_re, const pcre_extra *external_extra,
4544    const char *subject, int length, int options, int *offsets, int offsetcount)    const char *subject, int length, int start_offset, int options, int *offsets,
4545      int offsetcount)
4546  {  {
4547  int resetcount, ocount;  int resetcount, ocount;
4548  int first_char = -1;  int first_char = -1;
4549  int ims = 0;  int req_char = -1;
4550    int req_char2 = -1;
4551    unsigned long int ims = 0;
4552  match_data match_block;  match_data match_block;
4553  const uschar *start_bits = NULL;  const uschar *start_bits = NULL;
4554  const uschar *start_match = (const uschar *)subject;  const uschar *start_match = (const uschar *)subject + start_offset;
4555  const uschar *end_subject;  const uschar *end_subject;
4556    const uschar *req_char_ptr = start_match - 1;
4557  const real_pcre *re = (const real_pcre *)external_re;  const real_pcre *re = (const real_pcre *)external_re;
4558  const real_pcre_extra *extra = (const real_pcre_extra *)external_extra;  const real_pcre_extra *extra = (const real_pcre_extra *)external_extra;
4559  BOOL using_temporary_offsets = FALSE;  BOOL using_temporary_offsets = FALSE;
# Line 3952  if (re == NULL || subject == NULL || Line 4566  if (re == NULL || subject == NULL ||
4566     (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;     (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
4567  if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;  if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;
4568    
4569    match_block.start_pattern = re->code;
4570  match_block.start_subject = (const uschar *)subject;  match_block.start_subject = (const uschar *)subject;
4571  match_block.end_subject = match_block.start_subject + length;  match_block.end_subject = match_block.start_subject + length;
4572  end_subject = match_block.end_subject;  end_subject = match_block.end_subject;
# Line 3960  match_block.endonly = (re->options & PCR Line 4575  match_block.endonly = (re->options & PCR
4575    
4576  match_block.notbol = (options & PCRE_NOTBOL) != 0;  match_block.notbol = (options & PCRE_NOTBOL) != 0;
4577  match_block.noteol = (options & PCRE_NOTEOL) != 0;  match_block.noteol = (options & PCRE_NOTEOL) != 0;
4578    match_block.notempty = (options & PCRE_NOTEMPTY) != 0;
4579    
4580  match_block.errorcode = PCRE_ERROR_NOMATCH;     /* Default error */  match_block.errorcode = PCRE_ERROR_NOMATCH;     /* Default error */
4581    
4582    match_block.lcc = re->tables + lcc_offset;
4583    match_block.ctypes = re->tables + ctypes_offset;
4584    
4585  /* The ims options can vary during the matching as a result of the presence  /* The ims options can vary during the matching as a result of the presence
4586  of (?ims) items in the pattern. They are kept in a local variable so that  of (?ims) items in the pattern. They are kept in a local variable so that
4587  restoring at the exit of a group is easy. */  restoring at the exit of a group is easy. */
# Line 3997  in the pattern. */ Line 4616  in the pattern. */
4616  resetcount = 2 + re->top_bracket * 2;  resetcount = 2 + re->top_bracket * 2;
4617  if (resetcount > offsetcount) resetcount = ocount;  if (resetcount > offsetcount) resetcount = ocount;
4618    
4619    /* Reset the working variable associated with each extraction. These should
4620    never be used unless previously set, but they get saved and restored, and so we
4621    initialize them to avoid reading uninitialized locations. */
4622    
4623    if (match_block.offset_vector != NULL)
4624      {
4625      register int *iptr = match_block.offset_vector + ocount;
4626      register int *iend = iptr - resetcount/2 + 1;
4627      while (--iptr >= iend) *iptr = -1;
4628      }
4629    
4630  /* Set up the first character to match, if available. The first_char value is  /* Set up the first character to match, if available. The first_char value is
4631  never set for an anchored regular expression, but the anchoring may be forced  never set for an anchored regular expression, but the anchoring may be forced
4632  at run time, so we have to test for anchoring. The first char may be unset for  at run time, so we have to test for anchoring. The first char may be unset for
# Line 4008  if (!anchored) Line 4638  if (!anchored)
4638    if ((re->options & PCRE_FIRSTSET) != 0)    if ((re->options & PCRE_FIRSTSET) != 0)
4639      {      {
4640      first_char = re->first_char;      first_char = re->first_char;
4641      if ((ims & PCRE_CASELESS) != 0) first_char = pcre_lcc[first_char];      if ((ims & PCRE_CASELESS) != 0) first_char = match_block.lcc[first_char];
4642      }      }
4643    else    else
4644      if (!startline && extra != NULL &&      if (!startline && extra != NULL &&
# Line 4016  if (!anchored) Line 4646  if (!anchored)
4646          start_bits = extra->start_bits;          start_bits = extra->start_bits;
4647    }    }
4648    
4649  /* Loop for unanchored matches; for anchored regexps the loop runs just once. */  /* For anchored or unanchored matches, there may be a "last known required
4650    character" set. If the PCRE_CASELESS is set, implying that the match starts
4651    caselessly, or if there are any changes of this flag within the regex, set up
4652    both cases of the character. Otherwise set the two values the same, which will
4653    avoid duplicate testing (which takes significant time). This covers the vast
4654    majority of cases. It will be suboptimal when the case flag changes in a regex
4655    and the required character in fact is caseful. */
4656    
4657    if ((re->options & PCRE_REQCHSET) != 0)
4658      {
4659      req_char = re->req_char;
4660      req_char2 = ((re->options & (PCRE_CASELESS | PCRE_ICHANGED)) != 0)?
4661        (re->tables + fcc_offset)[req_char] : req_char;
4662      }
4663    
4664    /* Loop for handling unanchored repeated matching attempts; for anchored regexs
4665    the loop runs just once. */
4666    
4667  do  do
4668    {    {
# Line 4033  do Line 4679  do
4679    if (first_char >= 0)    if (first_char >= 0)
4680      {      {
4681      if ((ims & PCRE_CASELESS) != 0)      if ((ims & PCRE_CASELESS) != 0)
4682        while (start_match < end_subject && pcre_lcc[*start_match] != first_char)        while (start_match < end_subject &&
4683                 match_block.lcc[*start_match] != first_char)
4684          start_match++;          start_match++;
4685      else      else
4686        while (start_match < end_subject && *start_match != first_char)        while (start_match < end_subject && *start_match != first_char)
# Line 4044  do Line 4691  do
4691    
4692    else if (startline)    else if (startline)
4693      {      {
4694      if (start_match > match_block.start_subject)      if (start_match > match_block.start_subject + start_offset)
4695        {        {
4696        while (start_match < end_subject && start_match[-1] != '\n')        while (start_match < end_subject && start_match[-1] != '\n')
4697          start_match++;          start_match++;
4698        }        }
4699      }      }
4700    
4701    /* Or to a non-unique first char */    /* Or to a non-unique first char after study */
4702    
4703    else if (start_bits != NULL)    else if (start_bits != NULL)
4704      {      {
# Line 4068  do Line 4715  do
4715    printf("\n");    printf("\n");
4716  #endif  #endif
4717    
4718      /* If req_char is set, we know that that character must appear in the subject
4719      for the match to succeed. If the first character is set, req_char must be
4720      later in the subject; otherwise the test starts at the match point. This
4721      optimization can save a huge amount of backtracking in patterns with nested
4722      unlimited repeats that aren't going to match. We don't know what the state of
4723      case matching may be when this character is hit, so test for it in both its
4724      cases if necessary. However, the different cased versions will not be set up
4725      unless PCRE_CASELESS was given or the casing state changes within the regex.
4726      Writing separate code makes it go faster, as does using an autoincrement and
4727      backing off on a match. */
4728    
4729      if (req_char >= 0)
4730        {
4731        register const uschar *p = start_match + ((first_char >= 0)? 1 : 0);
4732    
4733        /* We don't need to repeat the search if we haven't yet reached the
4734        place we found it at last time. */
4735    
4736        if (p > req_char_ptr)
4737          {
4738          /* Do a single test if no case difference is set up */
4739    
4740          if (req_char == req_char2)
4741            {
4742            while (p < end_subject)
4743              {
4744              if (*p++ == req_char) { p--; break; }
4745              }
4746            }
4747    
4748          /* Otherwise test for either case */
4749    
4750          else
4751            {
4752            while (p < end_subject)
4753              {
4754              register int pp = *p++;
4755              if (pp == req_char || pp == req_char2) { p--; break; }
4756              }
4757            }
4758    
4759          /* If we can't find the required character, break the matching loop */
4760    
4761          if (p >= end_subject) break;
4762    
4763          /* If we have found the required character, save the point where we
4764          found it, so that we don't search again next time round the loop if
4765          the start hasn't passed this character yet. */
4766    
4767          req_char_ptr = p;
4768          }
4769        }
4770    
4771    /* When a match occurs, substrings will be set for all internal extractions;    /* When a match occurs, substrings will be set for all internal extractions;
4772    we just need to set up the whole thing as substring 0 before returning. If    we just need to set up the whole thing as substring 0 before returning. If
4773    there were too many extractions, set the return code to zero. In the case    there were too many extractions, set the return code to zero. In the case
# Line 4075  do Line 4775  do
4775    those back references that we can. In this case there need not be overflow    those back references that we can. In this case there need not be overflow
4776    if certain parts of the pattern were not used. */    if certain parts of the pattern were not used. */
4777    
4778    if (!match(start_match, re->code, 2, &match_block, ims, FALSE, start_match))    match_block.start_match = start_match;
4779      if (!match(start_match, re->code, 2, &match_block, ims, NULL, match_isgroup))
4780      continue;      continue;
4781    
4782    /* Copy the offset information from temporary store if necessary */    /* Copy the offset information from temporary store if necessary */
# Line 4106  do Line 4807  do
4807    DPRINTF((">>>> returning %d\n", rc));    DPRINTF((">>>> returning %d\n", rc));
4808    return rc;    return rc;
4809    }    }
4810    
4811    /* This "while" is the end of the "do" above */
4812    
4813  while (!anchored &&  while (!anchored &&
4814         match_block.errorcode == PCRE_ERROR_NOMATCH &&         match_block.errorcode == PCRE_ERROR_NOMATCH &&
4815         start_match++ < end_subject);         start_match++ < end_subject);

Legend:
Removed from v.23  
changed lines
  Added in v.47

  ViewVC Help
Powered by ViewVC 1.1.5