/[pcre]/code/tags/pcre-3.2/pcre.c
ViewVC logotype

Diff of /code/tags/pcre-3.2/pcre.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 23 by nigel, Sat Feb 24 21:38:41 2007 UTC revision 43 by nigel, Sat Feb 24 21:39:21 2007 UTC
# Line 9  the file Tech.Notes for some information Line 9  the file Tech.Notes for some information
9    
10  Written by: Philip Hazel <ph10@cam.ac.uk>  Written by: Philip Hazel <ph10@cam.ac.uk>
11    
12             Copyright (c) 1998 University of Cambridge             Copyright (c) 1997-2000 University of Cambridge
13    
14  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
15  Permission is granted to anyone to use this software for any purpose on any  Permission is granted to anyone to use this software for any purpose on any
# Line 25  restrictions: Line 25  restrictions:
25    
26  3. Altered versions must be plainly marked as such, and must not be  3. Altered versions must be plainly marked as such, and must not be
27     misrepresented as being the original software.     misrepresented as being the original software.
28    
29    4. If PCRE is embedded in any software that is released under the GNU
30       General Purpose Licence (GPL), then the terms of that licence shall
31       supersede any condition above with which it is incompatible.
32  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
33  */  */
34    
# Line 78  static const char *OP_names[] = { Line 82  static const char *OP_names[] = {
82    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
83    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
84    "*", "*?", "+", "+?", "?", "??", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{",
85    "class", "Ref",    "class", "Ref", "Recurse",
86    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",
87    "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",    "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",
88    "Brazero", "Braminzero", "Bra"    "Brazero", "Braminzero", "Bra"
# Line 103  static const short int escapes[] = { Line 107  static const short int escapes[] = {
107      0,      0, -ESC_z                                            /* x - z */      0,      0, -ESC_z                                            /* x - z */
108  };  };
109    
110    /* Tables of names of POSIX character classes and their lengths. The list is
111    terminated by a zero length entry. The first three must be alpha, upper, lower,
112    as this is assumed for handling case independence. */
113    
114    static const char *posix_names[] = {
115      "alpha", "lower", "upper",
116      "alnum", "ascii", "cntrl", "digit", "graph",
117      "print", "punct", "space", "word",  "xdigit" };
118    
119    static const uschar posix_name_lengths[] = {
120      5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 6, 0 };
121    
122    /* Table of class bit maps for each POSIX class; up to three may be combined
123    to form the class. */
124    
125    static const int posix_class_maps[] = {
126      cbit_lower, cbit_upper, -1,             /* alpha */
127      cbit_lower, -1,         -1,             /* lower */
128      cbit_upper, -1,         -1,             /* upper */
129      cbit_digit, cbit_lower, cbit_upper,     /* alnum */
130      cbit_print, cbit_cntrl, -1,             /* ascii */
131      cbit_cntrl, -1,         -1,             /* cntrl */
132      cbit_digit, -1,         -1,             /* digit */
133      cbit_graph, -1,         -1,             /* graph */
134      cbit_print, -1,         -1,             /* print */
135      cbit_punct, -1,         -1,             /* punct */
136      cbit_space, -1,         -1,             /* space */
137      cbit_word,  -1,         -1,             /* word */
138      cbit_xdigit,-1,         -1              /* xdigit */
139    };
140    
141    
142  /* Definition to allow mutual recursion */  /* Definition to allow mutual recursion */
143    
144  static BOOL  static BOOL
145    compile_regex(int, int, int *, uschar **, const uschar **, const char **,    compile_regex(int, int, int *, uschar **, const uschar **, const char **,
146      BOOL, int);      BOOL, int, int *, int *, compile_data *);
   
 /* Structure for passing "static" information around between the functions  
 doing the matching, so that they are thread-safe. */  
   
 typedef struct match_data {  
   int    errorcode;             /* As it says */  
   int   *offset_vector;         /* Offset vector */  
   int    offset_end;            /* One past the end */  
   int    offset_max;            /* The maximum usable for return data */  
   BOOL   offset_overflow;       /* Set if too many extractions */  
   BOOL   notbol;                /* NOTBOL flag */  
   BOOL   noteol;                /* NOTEOL flag */  
   BOOL   endonly;               /* Dollar not before final \n */  
   const uschar *start_subject;  /* Start of the subject string */  
   const uschar *end_subject;    /* End of the subject string */  
   const uschar *end_match_ptr;  /* Subject position at end match */  
   int     end_offset_top;       /* Highwater mark at end of match */  
 } match_data;  
147    
148    
149    
# Line 145  void  (*pcre_free)(void *) = free; Line 163  void  (*pcre_free)(void *) = free;
163    
164    
165  /*************************************************  /*************************************************
166    *             Default character tables           *
167    *************************************************/
168    
169    /* A default set of character tables is included in the PCRE binary. Its source
170    is built by the maketables auxiliary program, which uses the default C ctypes
171    functions, and put in the file chartables.c. These tables are used by PCRE
172    whenever the caller of pcre_compile() does not provide an alternate set of
173    tables. */
174    
175    #include "chartables.c"
176    
177    
178    
179    /*************************************************
180  *          Return version string                 *  *          Return version string                 *
181  *************************************************/  *************************************************/
182    
183    #define STRING(a)  # a
184    #define XSTRING(s) STRING(s)
185    
186  const char *  const char *
187  pcre_version(void)  pcre_version(void)
188  {  {
189  return PCRE_VERSION;  return XSTRING(PCRE_MAJOR) "." XSTRING(PCRE_MINOR) " " XSTRING(PCRE_DATE);
190  }  }
191    
192    
193    
194    
195  /*************************************************  /*************************************************
196  *       Return info about a compiled pattern     *  * (Obsolete) Return info about compiled pattern  *
197  *************************************************/  *************************************************/
198    
199  /* This function picks potentially useful data out of the private  /* This is the original "info" function. It picks potentially useful data out
200  structure.  of the private structure, but its interface was too rigid. It remains for
201    backwards compatibility. The public options are passed back in an int - though
202    the re->options field has been expanded to a long int, all the public options
203    at the low end of it, and so even on 16-bit systems this will still be OK.
204    Therefore, I haven't changed the API for pcre_info().
205    
206  Arguments:  Arguments:
207    external_re   points to compiled code    external_re   points to compiled code
# Line 171  Arguments: Line 210  Arguments:
210                  or -1 if multiline and all branches start ^,                  or -1 if multiline and all branches start ^,
211                  or -2 otherwise                  or -2 otherwise
212    
213  Returns:        number of identifying extraction brackets  Returns:        number of capturing subpatterns
214                  or negative values on error                  or negative values on error
215  */  */
216    
# Line 181  pcre_info(const pcre *external_re, int * Line 220  pcre_info(const pcre *external_re, int *
220  const real_pcre *re = (const real_pcre *)external_re;  const real_pcre *re = (const real_pcre *)external_re;
221  if (re == NULL) return PCRE_ERROR_NULL;  if (re == NULL) return PCRE_ERROR_NULL;
222  if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;  if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;
223  if (optptr != NULL) *optptr = (re->options & PUBLIC_OPTIONS);  if (optptr != NULL) *optptr = (int)(re->options & PUBLIC_OPTIONS);
224  if (first_char != NULL)  if (first_char != NULL)
225    *first_char = ((re->options & PCRE_FIRSTSET) != 0)? re->first_char :    *first_char = ((re->options & PCRE_FIRSTSET) != 0)? re->first_char :
226       ((re->options & PCRE_STARTLINE) != 0)? -1 : -2;       ((re->options & PCRE_STARTLINE) != 0)? -1 : -2;
# Line 190  return re->top_bracket; Line 229  return re->top_bracket;
229    
230    
231    
232    /*************************************************
233    *        Return info about compiled pattern      *
234    *************************************************/
235    
236    /* This is a newer "info" function which has an extensible interface so
237    that additional items can be added compatibly.
238    
239    Arguments:
240      external_re      points to compiled code
241      external_study   points to study data, or NULL
242      what             what information is required
243      where            where to put the information
244    
245    Returns:           0 if data returned, negative on error
246    */
247    
248    int
249    pcre_fullinfo(const pcre *external_re, const pcre_extra *study_data, int what,
250      void *where)
251    {
252    const real_pcre *re = (const real_pcre *)external_re;
253    const real_pcre_extra *study = (const real_pcre_extra *)study_data;
254    
255    if (re == NULL || where == NULL) return PCRE_ERROR_NULL;
256    if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;
257    
258    switch (what)
259      {
260      case PCRE_INFO_OPTIONS:
261      *((unsigned long int *)where) = re->options & PUBLIC_OPTIONS;
262      break;
263    
264      case PCRE_INFO_SIZE:
265      *((size_t *)where) = re->size;
266      break;
267    
268      case PCRE_INFO_CAPTURECOUNT:
269      *((int *)where) = re->top_bracket;
270      break;
271    
272      case PCRE_INFO_BACKREFMAX:
273      *((int *)where) = re->top_backref;
274      break;
275    
276      case PCRE_INFO_FIRSTCHAR:
277      *((int *)where) =
278        ((re->options & PCRE_FIRSTSET) != 0)? re->first_char :
279        ((re->options & PCRE_STARTLINE) != 0)? -1 : -2;
280      break;
281    
282      case PCRE_INFO_FIRSTTABLE:
283      *((const uschar **)where) =
284        (study != NULL && (study->options & PCRE_STUDY_MAPPED) != 0)?
285          study->start_bits : NULL;
286      break;
287    
288      case PCRE_INFO_LASTLITERAL:
289      *((int *)where) =
290        ((re->options & PCRE_REQCHSET) != 0)? re->req_char : -1;
291      break;
292    
293      default: return PCRE_ERROR_BADOPTION;
294      }
295    
296    return 0;
297    }
298    
299    
300    
301  #ifdef DEBUG  #ifdef DEBUG
302  /*************************************************  /*************************************************
# Line 237  Arguments: Line 344  Arguments:
344    bracount   number of previous extracting brackets    bracount   number of previous extracting brackets
345    options    the options bits    options    the options bits
346    isclass    TRUE if inside a character class    isclass    TRUE if inside a character class
347      cd         pointer to char tables block
348    
349  Returns:     zero or positive => a data character  Returns:     zero or positive => a data character
350               negative => a special escape sequence               negative => a special escape sequence
# Line 245  Returns:     zero or positive => a data Line 353  Returns:     zero or positive => a data
353    
354  static int  static int
355  check_escape(const uschar **ptrptr, const char **errorptr, int bracount,  check_escape(const uschar **ptrptr, const char **errorptr, int bracount,
356    int options, BOOL isclass)    int options, BOOL isclass, compile_data *cd)
357  {  {
358  const uschar *ptr = *ptrptr;  const uschar *ptr = *ptrptr;
359  int c = *(++ptr) & 255;   /* Ensure > 0 on signed-char systems */  int c, i;
 int i;  
360    
361    c = *(++ptr) & 255;   /* Ensure > 0 on signed-char systems */
362  if (c == 0) *errorptr = ERR1;  if (c == 0) *errorptr = ERR1;
363    
364  /* Digits or letters may have special meaning; all others are literals. */  /* Digits or letters may have special meaning; all others are literals. */
# Line 288  else Line 396  else
396        {        {
397        oldptr = ptr;        oldptr = ptr;
398        c -= '0';        c -= '0';
399        while ((pcre_ctypes[ptr[1]] & ctype_digit) != 0)        while ((cd->ctypes[ptr[1]] & ctype_digit) != 0)
400          c = c * 10 + *(++ptr) - '0';          c = c * 10 + *(++ptr) - '0';
401        if (c < 10 || c <= bracount)        if (c < 10 || c <= bracount)
402          {          {
# Line 314  else Line 422  else
422    
423      case '0':      case '0':
424      c -= '0';      c -= '0';
425      while(i++ < 2 && (pcre_ctypes[ptr[1]] & ctype_digit) != 0 &&      while(i++ < 2 && (cd->ctypes[ptr[1]] & ctype_digit) != 0 &&
426        ptr[1] != '8' && ptr[1] != '9')        ptr[1] != '8' && ptr[1] != '9')
427          c = c * 8 + *(++ptr) - '0';          c = c * 8 + *(++ptr) - '0';
428      break;      break;
# Line 323  else Line 431  else
431    
432      case 'x':      case 'x':
433      c = 0;      c = 0;
434      while (i++ < 2 && (pcre_ctypes[ptr[1]] & ctype_xdigit) != 0)      while (i++ < 2 && (cd->ctypes[ptr[1]] & ctype_xdigit) != 0)
435        {        {
436        ptr++;        ptr++;
437        c = c * 16 + pcre_lcc[*ptr] -        c = c * 16 + cd->lcc[*ptr] -
438          (((pcre_ctypes[*ptr] & ctype_digit) != 0)? '0' : 'W');          (((cd->ctypes[*ptr] & ctype_digit) != 0)? '0' : 'W');
439        }        }
440      break;      break;
441    
# Line 341  else Line 449  else
449    
450      /* A letter is upper-cased; then the 0x40 bit is flipped */      /* A letter is upper-cased; then the 0x40 bit is flipped */
451    
452      if (c >= 'a' && c <= 'z') c = pcre_fcc[c];      if (c >= 'a' && c <= 'z') c = cd->fcc[c];
453      c ^= 0x40;      c ^= 0x40;
454      break;      break;
455    
456      /* PCRE_EXTRA enables extensions to Perl in the matter of escapes. Any      /* PCRE_EXTRA enables extensions to Perl in the matter of escapes. Any
457      other alphameric following \ is an error if PCRE_EXTRA was set; otherwise,      other alphameric following \ is an error if PCRE_EXTRA was set; otherwise,
458      for Perl compatibility, it is a literal. */      for Perl compatibility, it is a literal. This code looks a bit odd, but
459        there used to be some cases other than the default, and there may be again
460        in future, so I haven't "optimized" it. */
461    
462      default:      default:
463      if ((options & PCRE_EXTRA) != 0) switch(c)      if ((options & PCRE_EXTRA) != 0) switch(c)
# Line 377  where the ddds are digits. Line 487  where the ddds are digits.
487    
488  Arguments:  Arguments:
489    p         pointer to the first char after '{'    p         pointer to the first char after '{'
490      cd        pointer to char tables block
491    
492  Returns:    TRUE or FALSE  Returns:    TRUE or FALSE
493  */  */
494    
495  static BOOL  static BOOL
496  is_counted_repeat(const uschar *p)  is_counted_repeat(const uschar *p, compile_data *cd)
497  {  {
498  if ((pcre_ctypes[*p++] & ctype_digit) == 0) return FALSE;  if ((cd->ctypes[*p++] & ctype_digit) == 0) return FALSE;
499  while ((pcre_ctypes[*p] & ctype_digit) != 0) p++;  while ((cd->ctypes[*p] & ctype_digit) != 0) p++;
500  if (*p == '}') return TRUE;  if (*p == '}') return TRUE;
501    
502  if (*p++ != ',') return FALSE;  if (*p++ != ',') return FALSE;
503  if (*p == '}') return TRUE;  if (*p == '}') return TRUE;
504    
505  if ((pcre_ctypes[*p++] & ctype_digit) == 0) return FALSE;  if ((cd->ctypes[*p++] & ctype_digit) == 0) return FALSE;
506  while ((pcre_ctypes[*p] & ctype_digit) != 0) p++;  while ((cd->ctypes[*p] & ctype_digit) != 0) p++;
507  return (*p == '}');  return (*p == '}');
508  }  }
509    
# Line 412  Arguments: Line 523  Arguments:
523    maxp       pointer to int for max    maxp       pointer to int for max
524               returned as -1 if no max               returned as -1 if no max
525    errorptr   points to pointer to error message    errorptr   points to pointer to error message
526      cd         pointer to character tables clock
527    
528  Returns:     pointer to '}' on success;  Returns:     pointer to '}' on success;
529               current ptr on error, with errorptr set               current ptr on error, with errorptr set
530  */  */
531    
532  static const uschar *  static const uschar *
533  read_repeat_counts(const uschar *p, int *minp, int *maxp, const char **errorptr)  read_repeat_counts(const uschar *p, int *minp, int *maxp,
534      const char **errorptr, compile_data *cd)
535  {  {
536  int min = 0;  int min = 0;
537  int max = -1;  int max = -1;
538    
539  while ((pcre_ctypes[*p] & ctype_digit) != 0) min = min * 10 + *p++ - '0';  while ((cd->ctypes[*p] & ctype_digit) != 0) min = min * 10 + *p++ - '0';
540    
541  if (*p == '}') max = min; else  if (*p == '}') max = min; else
542    {    {
543    if (*(++p) != '}')    if (*(++p) != '}')
544      {      {
545      max = 0;      max = 0;
546      while((pcre_ctypes[*p] & ctype_digit) != 0) max = max * 10 + *p++ - '0';      while((cd->ctypes[*p] & ctype_digit) != 0) max = max * 10 + *p++ - '0';
547      if (max < min)      if (max < min)
548        {        {
549        *errorptr = ERR4;        *errorptr = ERR4;
# Line 526  for (;;) Line 639  for (;;)
639    
640      case OP_REVERSE:      case OP_REVERSE:
641      cc++;      cc++;
642        /* Fall through */
643    
644      case OP_CREF:      case OP_CREF:
645      case OP_OPT:      case OP_OPT:
# Line 609  for (;;) Line 723  for (;;)
723    
724    
725  /*************************************************  /*************************************************
726    *           Check for POSIX class syntax         *
727    *************************************************/
728    
729    /* This function is called when the sequence "[:" or "[." or "[=" is
730    encountered in a character class. It checks whether this is followed by an
731    optional ^ and then a sequence of letters, terminated by a matching ":]" or
732    ".]" or "=]".
733    
734    Argument:
735      ptr      pointer to the initial [
736      endptr   where to return the end pointer
737      cd       pointer to compile data
738    
739    Returns:   TRUE or FALSE
740    */
741    
742    static BOOL
743    check_posix_syntax(const uschar *ptr, const uschar **endptr, compile_data *cd)
744    {
745    int terminator;          /* Don't combine these lines; the Solaris cc */
746    terminator = *(++ptr);   /* compiler warns about "non-constant" initializer. */
747    if (*(++ptr) == '^') ptr++;
748    while ((cd->ctypes[*ptr] & ctype_letter) != 0) ptr++;
749    if (*ptr == terminator && ptr[1] == ']')
750      {
751      *endptr = ptr;
752      return TRUE;
753      }
754    return FALSE;
755    }
756    
757    
758    
759    
760    /*************************************************
761    *          Check POSIX class name                *
762    *************************************************/
763    
764    /* This function is called to check the name given in a POSIX-style class entry
765    such as [:alnum:].
766    
767    Arguments:
768      ptr        points to the first letter
769      len        the length of the name
770    
771    Returns:     a value representing the name, or -1 if unknown
772    */
773    
774    static int
775    check_posix_name(const uschar *ptr, int len)
776    {
777    register int yield = 0;
778    while (posix_name_lengths[yield] != 0)
779      {
780      if (len == posix_name_lengths[yield] &&
781        strncmp((const char *)ptr, posix_names[yield], len) == 0) return yield;
782      yield++;
783      }
784    return -1;
785    }
786    
787    
788    
789    
790    /*************************************************
791  *           Compile one branch                   *  *           Compile one branch                   *
792  *************************************************/  *************************************************/
793    
794  /* Scan the pattern, compiling it into the code vector.  /* Scan the pattern, compiling it into the code vector.
795    
796  Arguments:  Arguments:
797    options     the option bits    options      the option bits
798    brackets    points to number of brackets used    brackets     points to number of brackets used
799    code        points to the pointer to the current code point    code         points to the pointer to the current code point
800    ptrptr      points to the current pattern pointer    ptrptr       points to the current pattern pointer
801    errorptr    points to pointer to error message    errorptr     points to pointer to error message
802    optchanged  set to the value of the last OP_OPT item compiled    optchanged   set to the value of the last OP_OPT item compiled
803      reqchar      set to the last literal character required, else -1
804      countlits    set to count of mandatory literal characters
805      cd           contains pointers to tables
806    
807  Returns:      TRUE on success  Returns:       TRUE on success
808                FALSE, with *errorptr set on error                 FALSE, with *errorptr set on error
809  */  */
810    
811  static BOOL  static BOOL
812  compile_branch(int options, int *brackets, uschar **codeptr,  compile_branch(int options, int *brackets, uschar **codeptr,
813    const uschar **ptrptr, const char **errorptr, int *optchanged)    const uschar **ptrptr, const char **errorptr, int *optchanged,
814      int *reqchar, int *countlits, compile_data *cd)
815  {  {
816  int repeat_type, op_type;  int repeat_type, op_type;
817  int repeat_min, repeat_max;  int repeat_min, repeat_max;
818  int bravalue, length;  int bravalue, length;
819  int greedy_default, greedy_non_default;  int greedy_default, greedy_non_default;
820    int prevreqchar;
821    int condcount = 0;
822    int subcountlits = 0;
823  register int c;  register int c;
824  register uschar *code = *codeptr;  register uschar *code = *codeptr;
825  uschar *tempcode;  uschar *tempcode;
# Line 647  uschar class[32]; Line 833  uschar class[32];
833  greedy_default = ((options & PCRE_UNGREEDY) != 0);  greedy_default = ((options & PCRE_UNGREEDY) != 0);
834  greedy_non_default = greedy_default ^ 1;  greedy_non_default = greedy_default ^ 1;
835    
836    /* Initialize no required char, and count of literals */
837    
838    *reqchar = prevreqchar = -1;
839    *countlits = 0;
840    
841  /* Switch on next character until the end of the branch */  /* Switch on next character until the end of the branch */
842    
843  for (;; ptr++)  for (;; ptr++)
# Line 656  for (;; ptr++) Line 847  for (;; ptr++)
847    int class_lastchar;    int class_lastchar;
848    int newoptions;    int newoptions;
849    int condref;    int condref;
850      int subreqchar;
851    
852    c = *ptr;    c = *ptr;
853    if ((options & PCRE_EXTENDED) != 0)    if ((options & PCRE_EXTENDED) != 0)
854      {      {
855      if ((pcre_ctypes[c] & ctype_space) != 0) continue;      if ((cd->ctypes[c] & ctype_space) != 0) continue;
856      if (c == '#')      if (c == '#')
857        {        {
858        while ((c = *(++ptr)) != 0 && c != '\n');        while ((c = *(++ptr)) != 0 && c != '\n');
# Line 738  for (;; ptr++) Line 930  for (;; ptr++)
930          goto FAILED;          goto FAILED;
931          }          }
932    
933          /* Handle POSIX class names. Perl allows a negation extension of the
934          form [:^name]. A square bracket that doesn't match the syntax is
935          treated as a literal. We also recognize the POSIX constructions
936          [.ch.] and [=ch=] ("collating elements") and fault them, as Perl
937          5.6 does. */
938    
939          if (c == '[' &&
940              (ptr[1] == ':' || ptr[1] == '.' || ptr[1] == '=') &&
941              check_posix_syntax(ptr, &tempptr, cd))
942            {
943            BOOL local_negate = FALSE;
944            int posix_class, i;
945            register const uschar *cbits = cd->cbits;
946    
947            if (ptr[1] != ':')
948              {
949              *errorptr = ERR31;
950              goto FAILED;
951              }
952    
953            ptr += 2;
954            if (*ptr == '^')
955              {
956              local_negate = TRUE;
957              ptr++;
958              }
959    
960            posix_class = check_posix_name(ptr, tempptr - ptr);
961            if (posix_class < 0)
962              {
963              *errorptr = ERR30;
964              goto FAILED;
965              }
966    
967            /* If matching is caseless, upper and lower are converted to
968            alpha. This relies on the fact that the class table starts with
969            alpha, lower, upper as the first 3 entries. */
970    
971            if ((options & PCRE_CASELESS) != 0 && posix_class <= 2)
972              posix_class = 0;
973    
974            /* Or into the map we are building up to 3 of the static class
975            tables, or their negations. */
976    
977            posix_class *= 3;
978            for (i = 0; i < 3; i++)
979              {
980              int taboffset = posix_class_maps[posix_class + i];
981              if (taboffset < 0) break;
982              if (local_negate)
983                for (c = 0; c < 32; c++) class[c] |= ~cbits[c+taboffset];
984              else
985                for (c = 0; c < 32; c++) class[c] |= cbits[c+taboffset];
986              }
987    
988            ptr = tempptr + 1;
989            class_charcount = 10;  /* Set > 1; assumes more than 1 per class */
990            continue;
991            }
992    
993        /* Backslash may introduce a single character, or it may introduce one        /* Backslash may introduce a single character, or it may introduce one
994        of the specials, which just set a flag. Escaped items are checked for        of the specials, which just set a flag. Escaped items are checked for
995        validity in the pre-compiling pass. The sequence \b is a special case.        validity in the pre-compiling pass. The sequence \b is a special case.
# Line 748  for (;; ptr++) Line 1000  for (;; ptr++)
1000    
1001        if (c == '\\')        if (c == '\\')
1002          {          {
1003          c = check_escape(&ptr, errorptr, *brackets, options, TRUE);          c = check_escape(&ptr, errorptr, *brackets, options, TRUE, cd);
1004          if (-c == ESC_b) c = '\b';          if (-c == ESC_b) c = '\b';
1005          else if (c < 0)          else if (c < 0)
1006            {            {
1007              register const uschar *cbits = cd->cbits;
1008            class_charcount = 10;            class_charcount = 10;
1009            switch (-c)            switch (-c)
1010              {              {
1011              case ESC_d:              case ESC_d:
1012              for (c = 0; c < 32; c++) class[c] |= pcre_cbits[c+cbit_digit];              for (c = 0; c < 32; c++) class[c] |= cbits[c+cbit_digit];
1013              continue;              continue;
1014    
1015              case ESC_D:              case ESC_D:
1016              for (c = 0; c < 32; c++) class[c] |= ~pcre_cbits[c+cbit_digit];              for (c = 0; c < 32; c++) class[c] |= ~cbits[c+cbit_digit];
1017              continue;              continue;
1018    
1019              case ESC_w:              case ESC_w:
1020              for (c = 0; c < 32; c++)              for (c = 0; c < 32; c++) class[c] |= cbits[c+cbit_word];
               class[c] |= (pcre_cbits[c] | pcre_cbits[c+cbit_word]);  
1021              continue;              continue;
1022    
1023              case ESC_W:              case ESC_W:
1024              for (c = 0; c < 32; c++)              for (c = 0; c < 32; c++) class[c] |= ~cbits[c+cbit_word];
               class[c] |= ~(pcre_cbits[c] | pcre_cbits[c+cbit_word]);  
1025              continue;              continue;
1026    
1027              case ESC_s:              case ESC_s:
1028              for (c = 0; c < 32; c++) class[c] |= pcre_cbits[c+cbit_space];              for (c = 0; c < 32; c++) class[c] |= cbits[c+cbit_space];
1029              continue;              continue;
1030    
1031              case ESC_S:              case ESC_S:
1032              for (c = 0; c < 32; c++) class[c] |= ~pcre_cbits[c+cbit_space];              for (c = 0; c < 32; c++) class[c] |= ~cbits[c+cbit_space];
1033              continue;              continue;
1034    
1035              default:              default:
# Line 810  for (;; ptr++) Line 1061  for (;; ptr++)
1061    
1062          if (d == '\\')          if (d == '\\')
1063            {            {
1064            d = check_escape(&ptr, errorptr, *brackets, options, TRUE);            d = check_escape(&ptr, errorptr, *brackets, options, TRUE, cd);
1065            if (d < 0)            if (d < 0)
1066              {              {
1067              if (d == -ESC_b) d = '\b'; else              if (d == -ESC_b) d = '\b'; else
# Line 832  for (;; ptr++) Line 1083  for (;; ptr++)
1083            class[c/8] |= (1 << (c&7));            class[c/8] |= (1 << (c&7));
1084            if ((options & PCRE_CASELESS) != 0)            if ((options & PCRE_CASELESS) != 0)
1085              {              {
1086              int uc = pcre_fcc[c];           /* flip case */              int uc = cd->fcc[c];           /* flip case */
1087              class[uc/8] |= (1 << (uc&7));              class[uc/8] |= (1 << (uc&7));
1088              }              }
1089            class_charcount++;                /* in case a one-char range */            class_charcount++;                /* in case a one-char range */
# Line 847  for (;; ptr++) Line 1098  for (;; ptr++)
1098        class [c/8] |= (1 << (c&7));        class [c/8] |= (1 << (c&7));
1099        if ((options & PCRE_CASELESS) != 0)        if ((options & PCRE_CASELESS) != 0)
1100          {          {
1101          c = pcre_fcc[c];   /* flip case */          c = cd->fcc[c];   /* flip case */
1102          class[c/8] |= (1 << (c&7));          class[c/8] |= (1 << (c&7));
1103          }          }
1104        class_charcount++;        class_charcount++;
# Line 894  for (;; ptr++) Line 1145  for (;; ptr++)
1145      /* Various kinds of repeat */      /* Various kinds of repeat */
1146    
1147      case '{':      case '{':
1148      if (!is_counted_repeat(ptr+1)) goto NORMAL_CHAR;      if (!is_counted_repeat(ptr+1, cd)) goto NORMAL_CHAR;
1149      ptr = read_repeat_counts(ptr+1, &repeat_min, &repeat_max, errorptr);      ptr = read_repeat_counts(ptr+1, &repeat_min, &repeat_max, errorptr, cd);
1150      if (*errorptr != NULL) goto FAILED;      if (*errorptr != NULL) goto FAILED;
1151      goto REPEAT;      goto REPEAT;
1152    
# Line 928  for (;; ptr++) Line 1179  for (;; ptr++)
1179        { repeat_type = greedy_non_default; ptr++; }        { repeat_type = greedy_non_default; ptr++; }
1180      else repeat_type = greedy_default;      else repeat_type = greedy_default;
1181    
     /* If the maximum is zero then the minimum must also be zero; Perl allows  
     this case, so we do too - by simply omitting the item altogether. */  
   
     if (repeat_max == 0) code = previous;  
   
1182      /* If previous was a string of characters, chop off the last one and use it      /* If previous was a string of characters, chop off the last one and use it
1183      as the subject of the repeat. If there was only one character, we can      as the subject of the repeat. If there was only one character, we can
1184      abolish the previous item altogether. */      abolish the previous item altogether. A repeat with a zero minimum wipes
1185        out any reqchar setting, backing up to the previous value. We must also
1186        adjust the countlits value. */
1187    
1188      else if (*previous == OP_CHARS)      if (*previous == OP_CHARS)
1189        {        {
1190        int len = previous[1];        int len = previous[1];
1191    
1192          if (repeat_min == 0) *reqchar = prevreqchar;
1193          *countlits += repeat_min - 1;
1194    
1195        if (len == 1)        if (len == 1)
1196          {          {
1197          c = previous[2];          c = previous[2];
# Line 978  for (;; ptr++) Line 1230  for (;; ptr++)
1230        code = previous;        code = previous;
1231    
1232        OUTPUT_SINGLE_REPEAT:        OUTPUT_SINGLE_REPEAT:
1233        repeat_type += op_type;      /* Combine both values for many cases */  
1234          /* If the maximum is zero then the minimum must also be zero; Perl allows
1235          this case, so we do too - by simply omitting the item altogether. */
1236    
1237          if (repeat_max == 0) goto END_REPEAT;
1238    
1239          /* Combine the op_type with the repeat_type */
1240    
1241          repeat_type += op_type;
1242    
1243        /* A minimum of zero is handled either as the special case * or ?, or as        /* A minimum of zero is handled either as the special case * or ?, or as
1244        an UPTO, with the maximum given. */        an UPTO, with the maximum given. */
# Line 1055  for (;; ptr++) Line 1315  for (;; ptr++)
1315        }        }
1316    
1317      /* If previous was a character class or a back reference, we put the repeat      /* If previous was a character class or a back reference, we put the repeat
1318      stuff after it. */      stuff after it, but just skip the item if the repeat was {0,0}. */
1319    
1320      else if (*previous == OP_CLASS || *previous == OP_REF)      else if (*previous == OP_CLASS || *previous == OP_REF)
1321        {        {
1322          if (repeat_max == 0)
1323            {
1324            code = previous;
1325            goto END_REPEAT;
1326            }
1327        if (repeat_min == 0 && repeat_max == -1)        if (repeat_min == 0 && repeat_max == -1)
1328          *code++ = OP_CRSTAR + repeat_type;          *code++ = OP_CRSTAR + repeat_type;
1329        else if (repeat_min == 1 && repeat_max == -1)        else if (repeat_min == 1 && repeat_max == -1)
# Line 1082  for (;; ptr++) Line 1347  for (;; ptr++)
1347      else if ((int)*previous >= OP_BRA || (int)*previous == OP_ONCE ||      else if ((int)*previous >= OP_BRA || (int)*previous == OP_ONCE ||
1348               (int)*previous == OP_COND)               (int)*previous == OP_COND)
1349        {        {
1350        int i, ketoffset = 0;        register int i;
1351          int ketoffset = 0;
1352        int len = code - previous;        int len = code - previous;
1353          uschar *bralink = NULL;
1354    
1355        /* If the maximum repeat count is unlimited, find the end of the bracket        /* If the maximum repeat count is unlimited, find the end of the bracket
1356        by scanning through from the start, and compute the offset back to it        by scanning through from the start, and compute the offset back to it
# Line 1098  for (;; ptr++) Line 1365  for (;; ptr++)
1365          ketoffset = code - ket;          ketoffset = code - ket;
1366          }          }
1367    
1368        /* If the minimum is greater than zero, and the maximum is unlimited or        /* The case of a zero minimum is special because of the need to stick
1369        equal to the minimum, the first copy remains where it is, and is        OP_BRAZERO in front of it, and because the group appears once in the
1370        replicated up to the minimum number of times. This case includes the +        data, whereas in other cases it appears the minimum number of times. For
1371        repeat, but of course no replication is needed in that case. */        this reason, it is simplest to treat this case separately, as otherwise
1372          the code gets far too mess. There are several special subcases when the
1373          minimum is zero. */
1374    
1375        if (repeat_min > 0 && (repeat_max == -1 || repeat_max == repeat_min))        if (repeat_min == 0)
1376          {          {
1377          for (i = 1; i < repeat_min; i++)          /* If we set up a required char from the bracket, we must back off
1378            to the previous value and reset the countlits value too. */
1379    
1380            if (subcountlits > 0)
1381            {            {
1382            memcpy(code, previous, len);            *reqchar = prevreqchar;
1383            code += len;            *countlits -= subcountlits;
1384            }            }
         }  
1385    
1386        /* If the minimum is zero, stick BRAZERO in front of the first copy.          /* If the maximum is also zero, we just omit the group from the output
1387        Then, if there is a fixed upper limit, replicated up to that many times,          altogether. */
       sticking BRAZERO in front of all the optional ones. */  
1388    
1389        else          if (repeat_max == 0)
1390          {            {
1391          if (repeat_min == 0)            code = previous;
1392              goto END_REPEAT;
1393              }
1394    
1395            /* If the maximum is 1 or unlimited, we just have to stick in the
1396            BRAZERO and do no more at this point. */
1397    
1398            if (repeat_max <= 1)
1399            {            {
1400            memmove(previous+1, previous, len);            memmove(previous+1, previous, len);
1401            code++;            code++;
1402            *previous++ = OP_BRAZERO + repeat_type;            *previous++ = OP_BRAZERO + repeat_type;
1403            }            }
1404    
1405            /* If the maximum is greater than 1 and limited, we have to replicate
1406            in a nested fashion, sticking OP_BRAZERO before each set of brackets.
1407            The first one has to be handled carefully because it's the original
1408            copy, which has to be moved up. The remainder can be handled by code
1409            that is common with the non-zero minimum case below. We just have to
1410            adjust the value or repeat_max, since one less copy is required. */
1411    
1412            else
1413              {
1414              int offset;
1415              memmove(previous+4, previous, len);
1416              code += 4;
1417              *previous++ = OP_BRAZERO + repeat_type;
1418              *previous++ = OP_BRA;
1419    
1420              /* We chain together the bracket offset fields that have to be
1421              filled in later when the ends of the brackets are reached. */
1422    
1423              offset = (bralink == NULL)? 0 : previous - bralink;
1424              bralink = previous;
1425              *previous++ = offset >> 8;
1426              *previous++ = offset & 255;
1427              }
1428    
1429            repeat_max--;
1430            }
1431    
1432          /* If the minimum is greater than zero, replicate the group as many
1433          times as necessary, and adjust the maximum to the number of subsequent
1434          copies that we need. */
1435    
1436          else
1437            {
1438          for (i = 1; i < repeat_min; i++)          for (i = 1; i < repeat_min; i++)
1439            {            {
1440            memcpy(code, previous, len);            memcpy(code, previous, len);
1441            code += len;            code += len;
1442            }            }
1443            if (repeat_max > 0) repeat_max -= repeat_min;
1444            }
1445    
1446          for (i = (repeat_min > 0)? repeat_min : 1; i < repeat_max; i++)        /* This code is common to both the zero and non-zero minimum cases. If
1447          the maximum is limited, it replicates the group in a nested fashion,
1448          remembering the bracket starts on a stack. In the case of a zero minimum,
1449          the first one was set up above. In all cases the repeat_max now specifies
1450          the number of additional copies needed. */
1451    
1452          if (repeat_max >= 0)
1453            {
1454            for (i = repeat_max - 1; i >= 0; i--)
1455            {            {
1456            *code++ = OP_BRAZERO + repeat_type;            *code++ = OP_BRAZERO + repeat_type;
1457    
1458              /* All but the final copy start a new nesting, maintaining the
1459              chain of brackets outstanding. */
1460    
1461              if (i != 0)
1462                {
1463                int offset;
1464                *code++ = OP_BRA;
1465                offset = (bralink == NULL)? 0 : code - bralink;
1466                bralink = code;
1467                *code++ = offset >> 8;
1468                *code++ = offset & 255;
1469                }
1470    
1471            memcpy(code, previous, len);            memcpy(code, previous, len);
1472            code += len;            code += len;
1473            }            }
1474    
1475            /* Now chain through the pending brackets, and fill in their length
1476            fields (which are holding the chain links pro tem). */
1477    
1478            while (bralink != NULL)
1479              {
1480              int oldlinkoffset;
1481              int offset = code - bralink + 1;
1482              uschar *bra = code - offset;
1483              oldlinkoffset = (bra[1] << 8) + bra[2];
1484              bralink = (oldlinkoffset == 0)? NULL : bralink - oldlinkoffset;
1485              *code++ = OP_KET;
1486              *code++ = bra[1] = offset >> 8;
1487              *code++ = bra[2] = (offset & 255);
1488              }
1489          }          }
1490    
1491        /* If the maximum is unlimited, set a repeater in the final copy. We        /* If the maximum is unlimited, set a repeater in the final copy. We
# Line 1144  for (;; ptr++) Line 1493  for (;; ptr++)
1493        don't know if there's been an options resetting after the ket. The        don't know if there's been an options resetting after the ket. The
1494        correct offset was computed above. */        correct offset was computed above. */
1495    
1496        if (repeat_max == -1) code[-ketoffset] = OP_KETRMAX + repeat_type;        else code[-ketoffset] = OP_KETRMAX + repeat_type;
1497        }        }
1498    
1499      /* Else there's some kind of shambles */      /* Else there's some kind of shambles */
# Line 1157  for (;; ptr++) Line 1506  for (;; ptr++)
1506    
1507      /* In all case we no longer have a previous item. */      /* In all case we no longer have a previous item. */
1508    
1509        END_REPEAT:
1510      previous = NULL;      previous = NULL;
1511      break;      break;
1512    
# Line 1191  for (;; ptr++) Line 1541  for (;; ptr++)
1541    
1542          case '(':          case '(':
1543          bravalue = OP_COND;       /* Conditional group */          bravalue = OP_COND;       /* Conditional group */
1544          if ((pcre_ctypes[*(++ptr)] & ctype_digit) != 0)          if ((cd->ctypes[*(++ptr)] & ctype_digit) != 0)
1545            {            {
1546            condref = *ptr - '0';            condref = *ptr - '0';
1547            while (*(++ptr) != ')') condref = condref*10 + *ptr - '0';            while (*(++ptr) != ')') condref = condref*10 + *ptr - '0';
# Line 1234  for (;; ptr++) Line 1584  for (;; ptr++)
1584          ptr++;          ptr++;
1585          break;          break;
1586    
1587            case 'R':                 /* Pattern recursion */
1588            *code++ = OP_RECURSE;
1589            ptr++;
1590            continue;
1591    
1592          default:                  /* Option setting */          default:                  /* Option setting */
1593          set = unset = 0;          set = unset = 0;
1594          optset = &set;          optset = &set;
# Line 1324  for (;; ptr++) Line 1679  for (;; ptr++)
1679           errorptr,                     /* Where to put an error message */           errorptr,                     /* Where to put an error message */
1680           (bravalue == OP_ASSERTBACK ||           (bravalue == OP_ASSERTBACK ||
1681            bravalue == OP_ASSERTBACK_NOT), /* TRUE if back assert */            bravalue == OP_ASSERTBACK_NOT), /* TRUE if back assert */
1682           condref))                     /* Condition reference number */           condref,                      /* Condition reference number */
1683             &subreqchar,                  /* For possible last char */
1684             &subcountlits,                /* For literal count */
1685             cd))                          /* Tables block */
1686        goto FAILED;        goto FAILED;
1687    
1688      /* At the end of compiling, code is still pointing to the start of the      /* At the end of compiling, code is still pointing to the start of the
# Line 1337  for (;; ptr++) Line 1695  for (;; ptr++)
1695    
1696      if (bravalue == OP_COND)      if (bravalue == OP_COND)
1697        {        {
       int branchcount = 0;  
1698        uschar *tc = code;        uschar *tc = code;
1699          condcount = 0;
1700    
1701        do {        do {
1702           branchcount++;           condcount++;
1703           tc += (tc[1] << 8) | tc[2];           tc += (tc[1] << 8) | tc[2];
1704           }           }
1705        while (*tc != OP_KET);        while (*tc != OP_KET);
1706    
1707        if (branchcount > 2)        if (condcount > 2)
1708          {          {
1709          *errorptr = ERR27;          *errorptr = ERR27;
1710          goto FAILED;          goto FAILED;
1711          }          }
1712        }        }
1713    
1714        /* Handle updating of the required character. If the subpattern didn't
1715        set one, leave it as it was. Otherwise, update it for normal brackets of
1716        all kinds, forward assertions, and conditions with two branches. Don't
1717        update the literal count for forward assertions, however. If the bracket
1718        is followed by a quantifier with zero repeat, we have to back off. Hence
1719        the definition of prevreqchar and subcountlits outside the main loop so
1720        that they can be accessed for the back off. */
1721    
1722        if (subreqchar > 0 &&
1723             (bravalue >= OP_BRA || bravalue == OP_ONCE || bravalue == OP_ASSERT ||
1724             (bravalue == OP_COND && condcount == 2)))
1725          {
1726          prevreqchar = *reqchar;
1727          *reqchar = subreqchar;
1728          if (bravalue != OP_ASSERT) *countlits += subcountlits;
1729          }
1730    
1731      /* Now update the main code pointer to the end of the group. */      /* Now update the main code pointer to the end of the group. */
1732    
1733      code = tempcode;      code = tempcode;
# Line 1372  for (;; ptr++) Line 1747  for (;; ptr++)
1747    
1748      case '\\':      case '\\':
1749      tempptr = ptr;      tempptr = ptr;
1750      c = check_escape(&ptr, errorptr, *brackets, options, FALSE);      c = check_escape(&ptr, errorptr, *brackets, options, FALSE, cd);
1751    
1752      /* Handle metacharacters introduced by \. For ones like \d, the ESC_ values      /* Handle metacharacters introduced by \. For ones like \d, the ESC_ values
1753      are arranged to be the negation of the corresponding OP_values. For the      are arranged to be the negation of the corresponding OP_values. For the
# Line 1417  for (;; ptr++) Line 1792  for (;; ptr++)
1792        {        {
1793        if ((options & PCRE_EXTENDED) != 0)        if ((options & PCRE_EXTENDED) != 0)
1794          {          {
1795          if ((pcre_ctypes[c] & ctype_space) != 0) continue;          if ((cd->ctypes[c] & ctype_space) != 0) continue;
1796          if (c == '#')          if (c == '#')
1797            {            {
1798            while ((c = *(++ptr)) != 0 && c != '\n');            while ((c = *(++ptr)) != 0 && c != '\n');
# Line 1433  for (;; ptr++) Line 1808  for (;; ptr++)
1808        if (c == '\\')        if (c == '\\')
1809          {          {
1810          tempptr = ptr;          tempptr = ptr;
1811          c = check_escape(&ptr, errorptr, *brackets, options, FALSE);          c = check_escape(&ptr, errorptr, *brackets, options, FALSE, cd);
1812          if (c < 0) { ptr = tempptr; break; }          if (c < 0) { ptr = tempptr; break; }
1813          }          }
1814    
# Line 1445  for (;; ptr++) Line 1820  for (;; ptr++)
1820    
1821      /* This "while" is the end of the "do" above. */      /* This "while" is the end of the "do" above. */
1822    
1823      while (length < 255 && (pcre_ctypes[c = *(++ptr)] & ctype_meta) == 0);      while (length < 255 && (cd->ctypes[c = *(++ptr)] & ctype_meta) == 0);
1824    
1825        /* Update the last character and the count of literals */
1826    
1827        prevreqchar = (length > 1)? code[-2] : *reqchar;
1828        *reqchar = code[-1];
1829        *countlits += length;
1830    
1831      /* Compute the length and set it in the data vector, and advance to      /* Compute the length and set it in the data vector, and advance to
1832      the next state. */      the next state. */
# Line 1490  Argument: Line 1871  Argument:
1871    errorptr    -> pointer to error message    errorptr    -> pointer to error message
1872    lookbehind  TRUE if this is a lookbehind assertion    lookbehind  TRUE if this is a lookbehind assertion
1873    condref     > 0 for OPT_CREF setting at start of conditional group    condref     > 0 for OPT_CREF setting at start of conditional group
1874      reqchar     -> place to put the last required character, or a negative number
1875      countlits   -> place to put the shortest literal count of any branch
1876      cd          points to the data block with tables pointers
1877    
1878  Returns:      TRUE on success  Returns:      TRUE on success
1879  */  */
1880    
1881  static BOOL  static BOOL
1882  compile_regex(int options, int optchanged, int *brackets, uschar **codeptr,  compile_regex(int options, int optchanged, int *brackets, uschar **codeptr,
1883    const uschar **ptrptr, const char **errorptr, BOOL lookbehind, int condref)    const uschar **ptrptr, const char **errorptr, BOOL lookbehind, int condref,
1884      int *reqchar, int *countlits, compile_data *cd)
1885  {  {
1886  const uschar *ptr = *ptrptr;  const uschar *ptr = *ptrptr;
1887  uschar *code = *codeptr;  uschar *code = *codeptr;
# Line 1504  uschar *last_branch = code; Line 1889  uschar *last_branch = code;
1889  uschar *start_bracket = code;  uschar *start_bracket = code;
1890  uschar *reverse_count = NULL;  uschar *reverse_count = NULL;
1891  int oldoptions = options & PCRE_IMS;  int oldoptions = options & PCRE_IMS;
1892    int branchreqchar, branchcountlits;
1893    
1894    *reqchar = -1;
1895    *countlits = INT_MAX;
1896  code += 3;  code += 3;
1897    
1898  /* At the start of a reference-based conditional group, insert the reference  /* At the start of a reference-based conditional group, insert the reference
# Line 1543  for (;;) Line 1931  for (;;)
1931    
1932    /* Now compile the branch */    /* Now compile the branch */
1933    
1934    if (!compile_branch(options, brackets, &code, &ptr, errorptr, &optchanged))    if (!compile_branch(options, brackets, &code, &ptr, errorptr, &optchanged,
1935          &branchreqchar, &branchcountlits, cd))
1936      {      {
1937      *ptrptr = ptr;      *ptrptr = ptr;
1938      return FALSE;      return FALSE;
# Line 1555  for (;;) Line 1944  for (;;)
1944    last_branch[1] = length >> 8;    last_branch[1] = length >> 8;
1945    last_branch[2] = length & 255;    last_branch[2] = length & 255;
1946    
1947      /* Save the last required character if all branches have the same; a current
1948      value of -1 means unset, while -2 means "previous branch had no last required
1949      char".  */
1950    
1951      if (*reqchar != -2)
1952        {
1953        if (branchreqchar >= 0)
1954          {
1955          if (*reqchar == -1) *reqchar = branchreqchar;
1956          else if (*reqchar != branchreqchar) *reqchar = -2;
1957          }
1958        else *reqchar = -2;
1959        }
1960    
1961      /* Keep the shortest literal count */
1962    
1963      if (branchcountlits < *countlits) *countlits = branchcountlits;
1964      DPRINTF(("literal count = %d min=%d\n", branchcountlits, *countlits));
1965    
1966    /* If lookbehind, check that this branch matches a fixed-length string,    /* If lookbehind, check that this branch matches a fixed-length string,
1967    and put the length into the OP_REVERSE item. Temporarily mark the end of    and put the length into the OP_REVERSE item. Temporarily mark the end of
1968    the branch with OP_END. */    the branch with OP_END. */
# Line 1649  for (;;) Line 2057  for (;;)
2057      code += 2;      code += 2;
2058      break;      break;
2059    
2060        case OP_WORD_BOUNDARY:
2061        case OP_NOT_WORD_BOUNDARY:
2062        code++;
2063        break;
2064    
2065      case OP_ASSERT_NOT:      case OP_ASSERT_NOT:
2066      case OP_ASSERTBACK:      case OP_ASSERTBACK:
2067      case OP_ASSERTBACK_NOT:      case OP_ASSERTBACK_NOT:
# Line 1676  all of whose alternatives start with OP_ Line 2089  all of whose alternatives start with OP_
2089  it's anchored. However, if this is a multiline pattern, then only OP_SOD  it's anchored. However, if this is a multiline pattern, then only OP_SOD
2090  counts, since OP_CIRC can match in the middle.  counts, since OP_CIRC can match in the middle.
2091    
2092  A branch is also implicitly anchored if it starts with .* because that will try  A branch is also implicitly anchored if it starts with .* and DOTALL is set,
2093  the rest of the pattern at all possible matching points, so there is no point  because that will try the rest of the pattern at all possible matching points,
2094  trying them again.  so there is no point trying them again.
2095    
2096  Arguments:  Arguments:
2097    code       points to start of expression (the bracket)    code       points to start of expression (the bracket)
# Line 1696  do { Line 2109  do {
2109     register int op = *scode;     register int op = *scode;
2110     if (op >= OP_BRA || op == OP_ASSERT || op == OP_ONCE || op == OP_COND)     if (op >= OP_BRA || op == OP_ASSERT || op == OP_ONCE || op == OP_COND)
2111       { if (!is_anchored(scode, options)) return FALSE; }       { if (!is_anchored(scode, options)) return FALSE; }
2112     else if (op == OP_TYPESTAR || op == OP_TYPEMINSTAR)     else if ((op == OP_TYPESTAR || op == OP_TYPEMINSTAR) &&
2113                (*options & PCRE_DOTALL) != 0)
2114       { if (scode[1] != OP_ANY) return FALSE; }       { if (scode[1] != OP_ANY) return FALSE; }
2115     else if (op != OP_SOD &&     else if (op != OP_SOD &&
2116             ((*options & PCRE_MULTILINE) != 0 || op != OP_CIRC))             ((*options & PCRE_MULTILINE) != 0 || op != OP_CIRC))
# Line 1710  return TRUE; Line 2124  return TRUE;
2124    
2125    
2126  /*************************************************  /*************************************************
2127  *     Check for start with \n line expression    *  *         Check for starting with ^ or .*        *
2128  *************************************************/  *************************************************/
2129    
2130  /* This is called for multiline expressions to try to find out if every branch  /* This is called to find out if every branch starts with ^ or .* so that
2131  starts with ^ so that "first char" processing can be done to speed things up.  "first char" processing can be done to speed things up in multiline
2132    matching and for non-DOTALL patterns that start with .* (which must start at
2133    the beginning or after \n).
2134    
2135  Argument:  points to start of expression (the bracket)  Argument:  points to start of expression (the bracket)
2136  Returns:   TRUE or FALSE  Returns:   TRUE or FALSE
# Line 1728  do { Line 2144  do {
2144     register int op = *scode;     register int op = *scode;
2145     if (op >= OP_BRA || op == OP_ASSERT || op == OP_ONCE || op == OP_COND)     if (op >= OP_BRA || op == OP_ASSERT || op == OP_ONCE || op == OP_COND)
2146       { if (!is_startline(scode)) return FALSE; }       { if (!is_startline(scode)) return FALSE; }
2147       else if (op == OP_TYPESTAR || op == OP_TYPEMINSTAR)
2148         { if (scode[1] != OP_ANY) return FALSE; }
2149     else if (op != OP_CIRC) return FALSE;     else if (op != OP_CIRC) return FALSE;
2150     code += (code[1] << 8) + code[2];     code += (code[1] << 8) + code[2];
2151     }     }
# Line 1813  Arguments: Line 2231  Arguments:
2231    options      various option bits    options      various option bits
2232    errorptr     pointer to pointer to error text    errorptr     pointer to pointer to error text
2233    erroroffset  ptr offset in pattern where error was detected    erroroffset  ptr offset in pattern where error was detected
2234      tables       pointer to character tables or NULL
2235    
2236  Returns:       pointer to compiled data block, or NULL on error,  Returns:       pointer to compiled data block, or NULL on error,
2237                 with errorptr and erroroffset set                 with errorptr and erroroffset set
# Line 1820  Returns:       pointer to compiled data Line 2239  Returns:       pointer to compiled data
2239    
2240  pcre *  pcre *
2241  pcre_compile(const char *pattern, int options, const char **errorptr,  pcre_compile(const char *pattern, int options, const char **errorptr,
2242    int *erroroffset)    int *erroroffset, const unsigned char *tables)
2243  {  {
2244  real_pcre *re;  real_pcre *re;
2245  int length = 3;      /* For initial BRA plus length */  int length = 3;      /* For initial BRA plus length */
2246  int runlength;  int runlength;
2247  int c, size;  int c, reqchar, countlits;
2248  int bracount = 0;  int bracount = 0;
2249  int top_backref = 0;  int top_backref = 0;
2250  int branch_extra = 0;  int branch_extra = 0;
2251  int branch_newextra;  int branch_newextra;
2252  unsigned int brastackptr = 0;  unsigned int brastackptr = 0;
2253    size_t size;
2254  uschar *code;  uschar *code;
2255  const uschar *ptr;  const uschar *ptr;
2256    compile_data compile_block;
2257  int brastack[BRASTACK_SIZE];  int brastack[BRASTACK_SIZE];
2258  uschar bralenstack[BRASTACK_SIZE];  uschar bralenstack[BRASTACK_SIZE];
2259    
# Line 1861  if ((options & ~PUBLIC_OPTIONS) != 0) Line 2282  if ((options & ~PUBLIC_OPTIONS) != 0)
2282    return NULL;    return NULL;
2283    }    }
2284    
2285    /* Set up pointers to the individual character tables */
2286    
2287    if (tables == NULL) tables = pcre_default_tables;
2288    compile_block.lcc = tables + lcc_offset;
2289    compile_block.fcc = tables + fcc_offset;
2290    compile_block.cbits = tables + cbits_offset;
2291    compile_block.ctypes = tables + ctypes_offset;
2292    
2293    /* Reflect pattern for debugging output */
2294    
2295  DPRINTF(("------------------------------------------------------------------\n"));  DPRINTF(("------------------------------------------------------------------\n"));
2296  DPRINTF(("%s\n", pattern));  DPRINTF(("%s\n", pattern));
2297    
# Line 1879  while ((c = *(++ptr)) != 0) Line 2310  while ((c = *(++ptr)) != 0)
2310    
2311    if ((options & PCRE_EXTENDED) != 0)    if ((options & PCRE_EXTENDED) != 0)
2312      {      {
2313      if ((pcre_ctypes[c] & ctype_space) != 0) continue;      if ((compile_block.ctypes[c] & ctype_space) != 0) continue;
2314      if (c == '#')      if (c == '#')
2315        {        {
2316        while ((c = *(++ptr)) != 0 && c != '\n');        while ((c = *(++ptr)) != 0 && c != '\n');
# Line 1897  while ((c = *(++ptr)) != 0) Line 2328  while ((c = *(++ptr)) != 0)
2328      case '\\':      case '\\':
2329        {        {
2330        const uschar *save_ptr = ptr;        const uschar *save_ptr = ptr;
2331        c = check_escape(&ptr, errorptr, bracount, options, FALSE);        c = check_escape(&ptr, errorptr, bracount, options, FALSE, &compile_block);
2332        if (*errorptr != NULL) goto PCRE_ERROR_RETURN;        if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
2333        if (c >= 0)        if (c >= 0)
2334          {          {
# Line 1917  while ((c = *(++ptr)) != 0) Line 2348  while ((c = *(++ptr)) != 0)
2348        int refnum = -c - ESC_REF;        int refnum = -c - ESC_REF;
2349        if (refnum > top_backref) top_backref = refnum;        if (refnum > top_backref) top_backref = refnum;
2350        length++;   /* For single back reference */        length++;   /* For single back reference */
2351        if (ptr[1] == '{' && is_counted_repeat(ptr+2))        if (ptr[1] == '{' && is_counted_repeat(ptr+2, &compile_block))
2352          {          {
2353          ptr = read_repeat_counts(ptr+2, &min, &max, errorptr);          ptr = read_repeat_counts(ptr+2, &min, &max, errorptr, &compile_block);
2354          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
2355          if ((min == 0 && (max == 1 || max == -1)) ||          if ((min == 0 && (max == 1 || max == -1)) ||
2356            (min == 1 && max == -1))            (min == 1 && max == -1))
# Line 1943  while ((c = *(++ptr)) != 0) Line 2374  while ((c = *(++ptr)) != 0)
2374      or back reference. */      or back reference. */
2375    
2376      case '{':      case '{':
2377      if (!is_counted_repeat(ptr+1)) goto NORMAL_CHAR;      if (!is_counted_repeat(ptr+1, &compile_block)) goto NORMAL_CHAR;
2378      ptr = read_repeat_counts(ptr+1, &min, &max, errorptr);      ptr = read_repeat_counts(ptr+1, &min, &max, errorptr, &compile_block);
2379      if (*errorptr != NULL) goto PCRE_ERROR_RETURN;      if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
2380      if ((min == 0 && (max == 1 || max == -1)) ||      if ((min == 0 && (max == 1 || max == -1)) ||
2381        (min == 1 && max == -1))        (min == 1 && max == -1))
# Line 1979  while ((c = *(++ptr)) != 0) Line 2410  while ((c = *(++ptr)) != 0)
2410        {        {
2411        if (*ptr == '\\')        if (*ptr == '\\')
2412          {          {
2413          int ch = check_escape(&ptr, errorptr, bracount, options, TRUE);          int ch = check_escape(&ptr, errorptr, bracount, options, TRUE,
2414              &compile_block);
2415          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
2416          if (-ch == ESC_b) class_charcount++; else class_charcount = 10;          if (-ch == ESC_b) class_charcount++; else class_charcount = 10;
2417          }          }
# Line 1996  while ((c = *(++ptr)) != 0) Line 2428  while ((c = *(++ptr)) != 0)
2428    
2429        /* A repeat needs either 1 or 5 bytes. */        /* A repeat needs either 1 or 5 bytes. */
2430    
2431        if (*ptr != 0 && ptr[1] == '{' && is_counted_repeat(ptr+2))        if (*ptr != 0 && ptr[1] == '{' && is_counted_repeat(ptr+2, &compile_block))
2432          {          {
2433          ptr = read_repeat_counts(ptr+2, &min, &max, errorptr);          ptr = read_repeat_counts(ptr+2, &min, &max, errorptr, &compile_block);
2434          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
2435          if ((min == 0 && (max == 1 || max == -1)) ||          if ((min == 0 && (max == 1 || max == -1)) ||
2436            (min == 1 && max == -1))            (min == 1 && max == -1))
# Line 2046  while ((c = *(++ptr)) != 0) Line 2478  while ((c = *(++ptr)) != 0)
2478          ptr += 2;          ptr += 2;
2479          break;          break;
2480    
2481            /* A recursive call to the regex is an extension, to provide the
2482            facility which can be obtained by $(?p{perl-code}) in Perl 5.6. */
2483    
2484            case 'R':
2485            if (ptr[3] != ')')
2486              {
2487              *errorptr = ERR29;
2488              goto PCRE_ERROR_RETURN;
2489              }
2490            ptr += 3;
2491            length += 1;
2492            break;
2493    
2494          /* Lookbehinds are in Perl from version 5.005 */          /* Lookbehinds are in Perl from version 5.005 */
2495    
2496          case '<':          case '<':
# Line 2064  while ((c = *(++ptr)) != 0) Line 2509  while ((c = *(++ptr)) != 0)
2509          group. */          group. */
2510    
2511          case '(':          case '(':
2512          if ((pcre_ctypes[ptr[3]] & ctype_digit) != 0)          if ((compile_block.ctypes[ptr[3]] & ctype_digit) != 0)
2513            {            {
2514            ptr += 4;            ptr += 4;
2515            length += 2;            length += 2;
2516            while ((pcre_ctypes[*ptr] & ctype_digit) != 0) ptr++;            while ((compile_block.ctypes[*ptr] & ctype_digit) != 0) ptr++;
2517            if (*ptr != ')')            if (*ptr != ')')
2518              {              {
2519              *errorptr = ERR26;              *errorptr = ERR26;
# Line 2153  while ((c = *(++ptr)) != 0) Line 2598  while ((c = *(++ptr)) != 0)
2598              will lead to an over-estimate on the length, but this shouldn't              will lead to an over-estimate on the length, but this shouldn't
2599              matter very much. We also have to allow for resetting options at              matter very much. We also have to allow for resetting options at
2600              the start of any alternations, which we do by setting              the start of any alternations, which we do by setting
2601              branch_newextra to 2. */              branch_newextra to 2. Finally, we record whether the case-dependent
2602                flag ever changes within the regex. This is used by the "required
2603                character" code. */
2604    
2605              case ':':              case ':':
2606              if (((set|unset) & PCRE_IMS) != 0)              if (((set|unset) & PCRE_IMS) != 0)
2607                {                {
2608                length += 4;                length += 4;
2609                branch_newextra = 2;                branch_newextra = 2;
2610                  if (((set|unset) & PCRE_CASELESS) != 0) options |= PCRE_ICHANGED;
2611                }                }
2612              goto END_OPTIONS;              goto END_OPTIONS;
2613    
# Line 2237  while ((c = *(++ptr)) != 0) Line 2685  while ((c = *(++ptr)) != 0)
2685        /* Leave ptr at the final char; for read_repeat_counts this happens        /* Leave ptr at the final char; for read_repeat_counts this happens
2686        automatically; for the others we need an increment. */        automatically; for the others we need an increment. */
2687    
2688        if ((c = ptr[1]) == '{' && is_counted_repeat(ptr+2))        if ((c = ptr[1]) == '{' && is_counted_repeat(ptr+2, &compile_block))
2689          {          {
2690          ptr = read_repeat_counts(ptr+2, &minval, &maxval, errorptr);          ptr = read_repeat_counts(ptr+2, &minval, &maxval, errorptr,
2691              &compile_block);
2692          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
2693          }          }
2694        else if (c == '*') { minval = 0; maxval = -1; ptr++; }        else if (c == '*') { minval = 0; maxval = -1; ptr++; }
2695        else if (c == '+') { maxval = -1; ptr++; }        else if (c == '+') { maxval = -1; ptr++; }
2696        else if (c == '?') { minval = 0; ptr++; }        else if (c == '?') { minval = 0; ptr++; }
2697    
2698        /* If there is a minimum > 1 we have to replicate up to minval-1 times;        /* If the minimum is zero, we have to allow for an OP_BRAZERO before the
2699        if there is a limited maximum we have to replicate up to maxval-1 times        group, and if the maximum is greater than zero, we have to replicate
2700        and allow for a BRAZERO item before each optional copy, as we also have        maxval-1 times; each replication acquires an OP_BRAZERO plus a nesting
2701        to do before the first copy if the minimum is zero. */        bracket set - hence the 7. */
2702    
2703        if (minval == 0) length++;        if (minval == 0)
2704          else if (minval > 1) length += (minval - 1) * duplength;          {
2705        if (maxval > minval) length += (maxval - minval) * (duplength + 1);          length++;
2706            if (maxval > 0) length += (maxval - 1) * (duplength + 7);
2707            }
2708    
2709          /* When the minimum is greater than zero, 1 we have to replicate up to
2710          minval-1 times, with no additions required in the copies. Then, if
2711          there is a limited maximum we have to replicate up to maxval-1 times
2712          allowing for a BRAZERO item before each optional copy and nesting
2713          brackets for all but one of the optional copies. */
2714    
2715          else
2716            {
2717            length += (minval - 1) * duplength;
2718            if (maxval > minval)   /* Need this test as maxval=-1 means no limit */
2719              length += (maxval - minval) * (duplength + 7) - 6;
2720            }
2721        }        }
2722      continue;      continue;
2723    
# Line 2270  while ((c = *(++ptr)) != 0) Line 2734  while ((c = *(++ptr)) != 0)
2734        {        {
2735        if ((options & PCRE_EXTENDED) != 0)        if ((options & PCRE_EXTENDED) != 0)
2736          {          {
2737          if ((pcre_ctypes[c] & ctype_space) != 0) continue;          if ((compile_block.ctypes[c] & ctype_space) != 0) continue;
2738          if (c == '#')          if (c == '#')
2739            {            {
2740            while ((c = *(++ptr)) != 0 && c != '\n');            while ((c = *(++ptr)) != 0 && c != '\n');
# Line 2284  while ((c = *(++ptr)) != 0) Line 2748  while ((c = *(++ptr)) != 0)
2748        if (c == '\\')        if (c == '\\')
2749          {          {
2750          const uschar *saveptr = ptr;          const uschar *saveptr = ptr;
2751          c = check_escape(&ptr, errorptr, bracount, options, FALSE);          c = check_escape(&ptr, errorptr, bracount, options, FALSE,
2752              &compile_block);
2753          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
2754          if (c < 0) { ptr = saveptr; break; }          if (c < 0) { ptr = saveptr; break; }
2755          }          }
# Line 2296  while ((c = *(++ptr)) != 0) Line 2761  while ((c = *(++ptr)) != 0)
2761    
2762      /* This "while" is the end of the "do" above. */      /* This "while" is the end of the "do" above. */
2763    
2764      while (runlength < 255 && (pcre_ctypes[c = *(++ptr)] & ctype_meta) == 0);      while (runlength < 255 &&
2765          (compile_block.ctypes[c = *(++ptr)] & ctype_meta) == 0);
2766    
2767      ptr--;      ptr--;
2768      length += runlength;      length += runlength;
# Line 2327  if (re == NULL) Line 2793  if (re == NULL)
2793    return NULL;    return NULL;
2794    }    }
2795    
2796  /* Put in the magic number and the options. */  /* Put in the magic number, and save the size, options, and table pointer */
2797    
2798  re->magic_number = MAGIC_NUMBER;  re->magic_number = MAGIC_NUMBER;
2799    re->size = size;
2800  re->options = options;  re->options = options;
2801    re->tables = tables;
2802    
2803  /* Set up a starting, non-extracting bracket, then compile the expression. On  /* Set up a starting, non-extracting bracket, then compile the expression. On
2804  error, *errorptr will be set non-NULL, so we don't need to look at the result  error, *errorptr will be set non-NULL, so we don't need to look at the result
# Line 2340  ptr = (const uschar *)pattern; Line 2808  ptr = (const uschar *)pattern;
2808  code = re->code;  code = re->code;
2809  *code = OP_BRA;  *code = OP_BRA;
2810  bracount = 0;  bracount = 0;
2811  (void)compile_regex(options, -1, &bracount, &code, &ptr, errorptr, FALSE, -1);  (void)compile_regex(options, -1, &bracount, &code, &ptr, errorptr, FALSE, -1,
2812      &reqchar, &countlits, &compile_block);
2813  re->top_bracket = bracount;  re->top_bracket = bracount;
2814  re->top_backref = top_backref;  re->top_backref = top_backref;
2815    
# Line 2372  if (*errorptr != NULL) Line 2841  if (*errorptr != NULL)
2841    return NULL;    return NULL;
2842    }    }
2843    
2844  /* If the anchored option was not passed, set flag if we can determine that it  /* If the anchored option was not passed, set flag if we can determine that the
2845  is anchored by virtue of ^ characters or \A or anything else. Otherwise, see if  pattern is anchored by virtue of ^ characters or \A or anything else (such as
2846  we can determine what the first character has to be, because that speeds up  starting with .* when DOTALL is set).
2847  unanchored matches no end. In the case of multiline matches, an alternative is  
2848  to set the PCRE_STARTLINE flag if all branches start with ^. */  Otherwise, see if we can determine what the first character has to be, because
2849    that speeds up unanchored matches no end. If not, see if we can set the
2850    PCRE_STARTLINE flag. This is helpful for multiline matches when all branches
2851    start with ^. and also when all branches start with .* for non-DOTALL matches.
2852    */
2853    
2854  if ((options & PCRE_ANCHORED) == 0)  if ((options & PCRE_ANCHORED) == 0)
2855    {    {
# Line 2396  if ((options & PCRE_ANCHORED) == 0) Line 2869  if ((options & PCRE_ANCHORED) == 0)
2869      }      }
2870    }    }
2871    
2872    /* Save the last required character if there are at least two literal
2873    characters on all paths, or if there is no first character setting. */
2874    
2875    if (reqchar >= 0 && (countlits > 1 || (re->options & PCRE_FIRSTSET) == 0))
2876      {
2877      re->req_char = reqchar;
2878      re->options |= PCRE_REQCHSET;
2879      }
2880    
2881  /* Print out the compiled data for debugging */  /* Print out the compiled data for debugging */
2882    
2883  #ifdef DEBUG  #ifdef DEBUG
# Line 2405  printf("Length = %d top_bracket = %d top Line 2887  printf("Length = %d top_bracket = %d top
2887    
2888  if (re->options != 0)  if (re->options != 0)
2889    {    {
2890    printf("%s%s%s%s%s%s%s%s\n",    printf("%s%s%s%s%s%s%s%s%s\n",
2891      ((re->options & PCRE_ANCHORED) != 0)? "anchored " : "",      ((re->options & PCRE_ANCHORED) != 0)? "anchored " : "",
2892      ((re->options & PCRE_CASELESS) != 0)? "caseless " : "",      ((re->options & PCRE_CASELESS) != 0)? "caseless " : "",
2893        ((re->options & PCRE_ICHANGED) != 0)? "case state changed " : "",
2894      ((re->options & PCRE_EXTENDED) != 0)? "extended " : "",      ((re->options & PCRE_EXTENDED) != 0)? "extended " : "",
2895      ((re->options & PCRE_MULTILINE) != 0)? "multiline " : "",      ((re->options & PCRE_MULTILINE) != 0)? "multiline " : "",
2896      ((re->options & PCRE_DOTALL) != 0)? "dotall " : "",      ((re->options & PCRE_DOTALL) != 0)? "dotall " : "",
# Line 2422  if ((re->options & PCRE_FIRSTSET) != 0) Line 2905  if ((re->options & PCRE_FIRSTSET) != 0)
2905      else printf("First char = \\x%02x\n", re->first_char);      else printf("First char = \\x%02x\n", re->first_char);
2906    }    }
2907    
2908    if ((re->options & PCRE_REQCHSET) != 0)
2909      {
2910      if (isprint(re->req_char)) printf("Req char = %c\n", re->req_char);
2911        else printf("Req char = \\x%02x\n", re->req_char);
2912      }
2913    
2914  code_end = code;  code_end = code;
2915  code_base = code = re->code;  code_base = code = re->code;
2916    
# Line 2637  return (pcre *)re; Line 3126  return (pcre *)re;
3126    
3127    
3128  /*************************************************  /*************************************************
 *        Match a character type                  *  
 *************************************************/  
   
 /* Not used in all the places it might be as it's sometimes faster  
 to put the code inline.  
   
 Arguments:  
   type        the character type  
   c           the character  
   dotall      the dotall flag  
   
 Returns:      TRUE if character is of the type  
 */  
   
 static BOOL  
 match_type(int type, int c, BOOL dotall)  
 {  
   
 #ifdef DEBUG  
 if (isprint(c)) printf("matching subject %c against ", c);  
   else printf("matching subject \\x%02x against ", c);  
 printf("%s\n", OP_names[type]);  
 #endif  
   
 switch(type)  
   {  
   case OP_ANY:            return dotall || c != '\n';  
   case OP_NOT_DIGIT:      return (pcre_ctypes[c] & ctype_digit) == 0;  
   case OP_DIGIT:          return (pcre_ctypes[c] & ctype_digit) != 0;  
   case OP_NOT_WHITESPACE: return (pcre_ctypes[c] & ctype_space) == 0;  
   case OP_WHITESPACE:     return (pcre_ctypes[c] & ctype_space) != 0;  
   case OP_NOT_WORDCHAR:   return (pcre_ctypes[c] & ctype_word) == 0;  
   case OP_WORDCHAR:       return (pcre_ctypes[c] & ctype_word) != 0;  
   }  
 return FALSE;  
 }  
   
   
   
 /*************************************************  
3129  *          Match a back-reference                *  *          Match a back-reference                *
3130  *************************************************/  *************************************************/
3131    
# Line 2695  Returns:      TRUE if matched Line 3144  Returns:      TRUE if matched
3144    
3145  static BOOL  static BOOL
3146  match_ref(int offset, register const uschar *eptr, int length, match_data *md,  match_ref(int offset, register const uschar *eptr, int length, match_data *md,
3147    int ims)    unsigned long int ims)
3148  {  {
3149  const uschar *p = md->start_subject + md->offset_vector[offset];  const uschar *p = md->start_subject + md->offset_vector[offset];
3150    
# Line 2719  if (length > md->end_subject - eptr) ret Line 3168  if (length > md->end_subject - eptr) ret
3168  /* Separate the caselesss case for speed */  /* Separate the caselesss case for speed */
3169    
3170  if ((ims & PCRE_CASELESS) != 0)  if ((ims & PCRE_CASELESS) != 0)
3171    { while (length-- > 0) if (pcre_lcc[*p++] != pcre_lcc[*eptr++]) return FALSE; }    {
3172      while (length-- > 0)
3173        if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE;
3174      }
3175  else  else
3176    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
3177    
# Line 2751  Returns:       TRUE if matched Line 3203  Returns:       TRUE if matched
3203    
3204  static BOOL  static BOOL
3205  match(register const uschar *eptr, register const uschar *ecode,  match(register const uschar *eptr, register const uschar *ecode,
3206    int offset_top, match_data *md, int ims, BOOL condassert, const uschar *eptrb)    int offset_top, match_data *md, unsigned long int ims, BOOL condassert,
3207      const uschar *eptrb)
3208  {  {
3209  int original_ims = ims;   /* Save for resetting on ')' */  unsigned long int original_ims = ims;   /* Save for resetting on ')' */
3210    
3211  for (;;)  for (;;)
3212    {    {
# Line 2782  for (;;) Line 3235  for (;;)
3235      int number = op - OP_BRA;      int number = op - OP_BRA;
3236      int offset = number << 1;      int offset = number << 1;
3237    
3238      DPRINTF(("start bracket %d\n", number));  #ifdef DEBUG
3239        printf("start bracket %d subject=", number);
3240        pchars(eptr, 16, TRUE, md);
3241        printf("\n");
3242    #endif
3243    
3244      if (offset < md->offset_max)      if (offset < md->offset_max)
3245        {        {
# Line 2864  for (;;) Line 3321  for (;;)
3321      ecode += 2;      ecode += 2;
3322      break;      break;
3323    
3324      /* End of the pattern */      /* End of the pattern. If PCRE_NOTEMPTY is set, fail if we have matched
3325        an empty string - recursion will then try other alternatives, if any. */
3326    
3327      case OP_END:      case OP_END:
3328        if (md->notempty && eptr == md->start_match) return FALSE;
3329      md->end_match_ptr = eptr;          /* Record where we ended */      md->end_match_ptr = eptr;          /* Record where we ended */
3330      md->end_offset_top = offset_top;   /* and how many extracts were taken */      md->end_offset_top = offset_top;   /* and how many extracts were taken */
3331      return TRUE;      return TRUE;
# Line 2876  for (;;) Line 3335  for (;;)
3335      case OP_OPT:      case OP_OPT:
3336      ims = ecode[1];      ims = ecode[1];
3337      ecode += 2;      ecode += 2;
3338      DPRINTF(("ims set to %02x\n", ims));      DPRINTF(("ims set to %02lx\n", ims));
3339      break;      break;
3340    
3341      /* Assertion brackets. Check the alternative branches in turn - the      /* Assertion brackets. Check the alternative branches in turn - the
# Line 2932  for (;;) Line 3391  for (;;)
3391      ecode += 3;      ecode += 3;
3392      break;      break;
3393    
3394        /* Recursion matches the current regex, nested. If there are any capturing
3395        brackets started but not finished, we have to save their starting points
3396        and reinstate them after the recursion. However, we don't know how many
3397        such there are (offset_top records the completed total) so we just have
3398        to save all the potential data. There may be up to 99 such values, which
3399        is a bit large to put on the stack, but using malloc for small numbers
3400        seems expensive. As a compromise, the stack is used when there are fewer
3401        than 16 values to store; otherwise malloc is used. A problem is what to do
3402        if the malloc fails ... there is no way of returning to the top level with
3403        an error. Save the top 15 values on the stack, and accept that the rest
3404        may be wrong. */
3405    
3406        case OP_RECURSE:
3407          {
3408          BOOL rc;
3409          int *save;
3410          int stacksave[15];
3411    
3412          c = md->offset_max;
3413    
3414          if (c < 16) save = stacksave; else
3415            {
3416            save = (int *)(pcre_malloc)((c+1) * sizeof(int));
3417            if (save == NULL)
3418              {
3419              save = stacksave;
3420              c = 15;
3421              }
3422            }
3423    
3424          for (i = 1; i <= c; i++)
3425            save[i] = md->offset_vector[md->offset_end - i];
3426          rc = match(eptr, md->start_pattern, offset_top, md, ims, FALSE, eptrb);
3427          for (i = 1; i <= c; i++)
3428            md->offset_vector[md->offset_end - i] = save[i];
3429          if (save != stacksave) (pcre_free)(save);
3430          if (!rc) return FALSE;
3431    
3432          /* In case the recursion has set more capturing values, save the final
3433          number, then move along the subject till after the recursive match,
3434          and advance one byte in the pattern code. */
3435    
3436          offset_top = md->end_offset_top;
3437          eptr = md->end_match_ptr;
3438          ecode++;
3439          }
3440        break;
3441    
3442      /* "Once" brackets are like assertion brackets except that after a match,      /* "Once" brackets are like assertion brackets except that after a match,
3443      the point in the subject string is not moved back. Thus there can never be      the point in the subject string is not moved back. Thus there can never be
# Line 2983  for (;;) Line 3489  for (;;)
3489        if (ecode[3] == OP_OPT)        if (ecode[3] == OP_OPT)
3490          {          {
3491          ims = (ims & ~PCRE_IMS) | ecode[4];          ims = (ims & ~PCRE_IMS) | ecode[4];
3492          DPRINTF(("ims set to %02x at group repeat\n", ims));          DPRINTF(("ims set to %02lx at group repeat\n", ims));
3493          }          }
3494    
3495        if (*ecode == OP_KETRMIN)        if (*ecode == OP_KETRMIN)
# Line 3077  for (;;) Line 3583  for (;;)
3583        the group. */        the group. */
3584    
3585        ims = original_ims;        ims = original_ims;
3586        DPRINTF(("ims reset to %02x\n", ims));        DPRINTF(("ims reset to %02lx\n", ims));
3587    
3588        /* For a non-repeating ket, just continue at this level. This also        /* For a non-repeating ket, just continue at this level. This also
3589        happens for a repeating ket if no characters were matched in the group.        happens for a repeating ket if no characters were matched in the group.
# Line 3172  for (;;) Line 3678  for (;;)
3678      case OP_WORD_BOUNDARY:      case OP_WORD_BOUNDARY:
3679        {        {
3680        BOOL prev_is_word = (eptr != md->start_subject) &&        BOOL prev_is_word = (eptr != md->start_subject) &&
3681          ((pcre_ctypes[eptr[-1]] & ctype_word) != 0);          ((md->ctypes[eptr[-1]] & ctype_word) != 0);
3682        BOOL cur_is_word = (eptr < md->end_subject) &&        BOOL cur_is_word = (eptr < md->end_subject) &&
3683          ((pcre_ctypes[*eptr] & ctype_word) != 0);          ((md->ctypes[*eptr] & ctype_word) != 0);
3684        if ((*ecode++ == OP_WORD_BOUNDARY)?        if ((*ecode++ == OP_WORD_BOUNDARY)?
3685             cur_is_word == prev_is_word : cur_is_word != prev_is_word)             cur_is_word == prev_is_word : cur_is_word != prev_is_word)
3686          return FALSE;          return FALSE;
# Line 3191  for (;;) Line 3697  for (;;)
3697      break;      break;
3698    
3699      case OP_NOT_DIGIT:      case OP_NOT_DIGIT:
3700      if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_digit) != 0)      if (eptr >= md->end_subject ||
3701           (md->ctypes[*eptr++] & ctype_digit) != 0)
3702        return FALSE;        return FALSE;
3703      ecode++;      ecode++;
3704      break;      break;
3705    
3706      case OP_DIGIT:      case OP_DIGIT:
3707      if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_digit) == 0)      if (eptr >= md->end_subject ||
3708           (md->ctypes[*eptr++] & ctype_digit) == 0)
3709        return FALSE;        return FALSE;
3710      ecode++;      ecode++;
3711      break;      break;
3712    
3713      case OP_NOT_WHITESPACE:      case OP_NOT_WHITESPACE:
3714      if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_space) != 0)      if (eptr >= md->end_subject ||
3715           (md->ctypes[*eptr++] & ctype_space) != 0)
3716        return FALSE;        return FALSE;
3717      ecode++;      ecode++;
3718      break;      break;
3719    
3720      case OP_WHITESPACE:      case OP_WHITESPACE:
3721      if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_space) == 0)      if (eptr >= md->end_subject ||
3722           (md->ctypes[*eptr++] & ctype_space) == 0)
3723        return FALSE;        return FALSE;
3724      ecode++;      ecode++;
3725      break;      break;
3726    
3727      case OP_NOT_WORDCHAR:      case OP_NOT_WORDCHAR:
3728      if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_word) != 0)      if (eptr >= md->end_subject ||
3729           (md->ctypes[*eptr++] & ctype_word) != 0)
3730        return FALSE;        return FALSE;
3731      ecode++;      ecode++;
3732      break;      break;
3733    
3734      case OP_WORDCHAR:      case OP_WORDCHAR:
3735      if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_word) == 0)      if (eptr >= md->end_subject ||
3736           (md->ctypes[*eptr++] & ctype_word) == 0)
3737        return FALSE;        return FALSE;
3738      ecode++;      ecode++;
3739      break;      break;
# Line 3453  for (;;) Line 3965  for (;;)
3965        if (length > md->end_subject - eptr) return FALSE;        if (length > md->end_subject - eptr) return FALSE;
3966        if ((ims & PCRE_CASELESS) != 0)        if ((ims & PCRE_CASELESS) != 0)
3967          {          {
3968          while (length-- > 0) if (pcre_lcc[*ecode++] != pcre_lcc[*eptr++]) return FALSE;          while (length-- > 0)
3969              if (md->lcc[*ecode++] != md->lcc[*eptr++])
3970                return FALSE;
3971          }          }
3972        else        else
3973          {          {
# Line 3510  for (;;) Line 4024  for (;;)
4024    
4025      if ((ims & PCRE_CASELESS) != 0)      if ((ims & PCRE_CASELESS) != 0)
4026        {        {
4027        c = pcre_lcc[c];        c = md->lcc[c];
4028        for (i = 1; i <= min; i++) if (c != pcre_lcc[*eptr++]) return FALSE;        for (i = 1; i <= min; i++)
4029            if (c != md->lcc[*eptr++]) return FALSE;
4030        if (min == max) continue;        if (min == max) continue;
4031        if (minimize)        if (minimize)
4032          {          {
# Line 3519  for (;;) Line 4034  for (;;)
4034            {            {
4035            if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb))            if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb))
4036              return TRUE;              return TRUE;
4037            if (i >= max || eptr >= md->end_subject || c != pcre_lcc[*eptr++])            if (i >= max || eptr >= md->end_subject ||
4038                  c != md->lcc[*eptr++])
4039              return FALSE;              return FALSE;
4040            }            }
4041          /* Control never gets here */          /* Control never gets here */
# Line 3529  for (;;) Line 4045  for (;;)
4045          const uschar *pp = eptr;          const uschar *pp = eptr;
4046          for (i = min; i < max; i++)          for (i = min; i < max; i++)
4047            {            {
4048            if (eptr >= md->end_subject || c != pcre_lcc[*eptr]) break;            if (eptr >= md->end_subject || c != md->lcc[*eptr]) break;
4049            eptr++;            eptr++;
4050            }            }
4051          while (eptr >= pp)          while (eptr >= pp)
# Line 3579  for (;;) Line 4095  for (;;)
4095      ecode++;      ecode++;
4096      if ((ims & PCRE_CASELESS) != 0)      if ((ims & PCRE_CASELESS) != 0)
4097        {        {
4098        if (pcre_lcc[*ecode++] == pcre_lcc[*eptr++]) return FALSE;        if (md->lcc[*ecode++] == md->lcc[*eptr++]) return FALSE;
4099        }        }
4100      else      else
4101        {        {
# Line 3639  for (;;) Line 4155  for (;;)
4155    
4156      if ((ims & PCRE_CASELESS) != 0)      if ((ims & PCRE_CASELESS) != 0)
4157        {        {
4158        c = pcre_lcc[c];        c = md->lcc[c];
4159        for (i = 1; i <= min; i++) if (c == pcre_lcc[*eptr++]) return FALSE;        for (i = 1; i <= min; i++)
4160            if (c == md->lcc[*eptr++]) return FALSE;
4161        if (min == max) continue;        if (min == max) continue;
4162        if (minimize)        if (minimize)
4163          {          {
# Line 3648  for (;;) Line 4165  for (;;)
4165            {            {
4166            if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb))            if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb))
4167              return TRUE;              return TRUE;
4168            if (i >= max || eptr >= md->end_subject || c == pcre_lcc[*eptr++])            if (i >= max || eptr >= md->end_subject ||
4169                  c == md->lcc[*eptr++])
4170              return FALSE;              return FALSE;
4171            }            }
4172          /* Control never gets here */          /* Control never gets here */
# Line 3658  for (;;) Line 4176  for (;;)
4176          const uschar *pp = eptr;          const uschar *pp = eptr;
4177          for (i = min; i < max; i++)          for (i = min; i < max; i++)
4178            {            {
4179            if (eptr >= md->end_subject || c == pcre_lcc[*eptr]) break;            if (eptr >= md->end_subject || c == md->lcc[*eptr]) break;
4180            eptr++;            eptr++;
4181            }            }
4182          while (eptr >= pp)          while (eptr >= pp)
# Line 3752  for (;;) Line 4270  for (;;)
4270    
4271        case OP_NOT_DIGIT:        case OP_NOT_DIGIT:
4272        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
4273          if ((pcre_ctypes[*eptr++] & ctype_digit) != 0) return FALSE;          if ((md->ctypes[*eptr++] & ctype_digit) != 0) return FALSE;
4274        break;        break;
4275    
4276        case OP_DIGIT:        case OP_DIGIT:
4277        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
4278          if ((pcre_ctypes[*eptr++] & ctype_digit) == 0) return FALSE;          if ((md->ctypes[*eptr++] & ctype_digit) == 0) return FALSE;
4279        break;        break;
4280    
4281        case OP_NOT_WHITESPACE:        case OP_NOT_WHITESPACE:
4282        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
4283          if ((pcre_ctypes[*eptr++] & ctype_space) != 0) return FALSE;          if ((md->ctypes[*eptr++] & ctype_space) != 0) return FALSE;
4284        break;        break;
4285    
4286        case OP_WHITESPACE:        case OP_WHITESPACE:
4287        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
4288          if ((pcre_ctypes[*eptr++] & ctype_space) == 0) return FALSE;          if ((md->ctypes[*eptr++] & ctype_space) == 0) return FALSE;
4289        break;        break;
4290    
4291        case OP_NOT_WORDCHAR:        case OP_NOT_WORDCHAR:
4292        for (i = 1; i <= min; i++) if ((pcre_ctypes[*eptr++] & ctype_word) != 0)        for (i = 1; i <= min; i++)
4293          return FALSE;          if ((md->ctypes[*eptr++] & ctype_word) != 0)
4294              return FALSE;
4295        break;        break;
4296    
4297        case OP_WORDCHAR:        case OP_WORDCHAR:
4298        for (i = 1; i <= min; i++) if ((pcre_ctypes[*eptr++] & ctype_word) == 0)        for (i = 1; i <= min; i++)
4299          return FALSE;          if ((md->ctypes[*eptr++] & ctype_word) == 0)
4300              return FALSE;
4301        break;        break;
4302        }        }
4303    
# Line 3786  for (;;) Line 4306  for (;;)
4306      if (min == max) continue;      if (min == max) continue;
4307    
4308      /* If minimizing, we have to test the rest of the pattern before each      /* If minimizing, we have to test the rest of the pattern before each
4309      subsequent match, so inlining isn't much help; just use the function. */      subsequent match. */
4310    
4311      if (minimize)      if (minimize)
4312        {        {
4313        for (i = min;; i++)        for (i = min;; i++)
4314          {          {
4315          if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb)) return TRUE;          if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb)) return TRUE;
4316          if (i >= max || eptr >= md->end_subject ||          if (i >= max || eptr >= md->end_subject) return FALSE;
4317            !match_type(ctype, *eptr++, (ims & PCRE_DOTALL) != 0))  
4318              return FALSE;          c = *eptr++;
4319            switch(ctype)
4320              {
4321              case OP_ANY:
4322              if ((ims & PCRE_DOTALL) == 0 && c == '\n') return FALSE;
4323              break;
4324    
4325              case OP_NOT_DIGIT:
4326              if ((md->ctypes[c] & ctype_digit) != 0) return FALSE;
4327              break;
4328    
4329              case OP_DIGIT:
4330              if ((md->ctypes[c] & ctype_digit) == 0) return FALSE;
4331              break;
4332    
4333              case OP_NOT_WHITESPACE:
4334              if ((md->ctypes[c] & ctype_space) != 0) return FALSE;
4335              break;
4336    
4337              case OP_WHITESPACE:
4338              if  ((md->ctypes[c] & ctype_space) == 0) return FALSE;
4339              break;
4340    
4341              case OP_NOT_WORDCHAR:
4342              if ((md->ctypes[c] & ctype_word) != 0) return FALSE;
4343              break;
4344    
4345              case OP_WORDCHAR:
4346              if ((md->ctypes[c] & ctype_word) == 0) return FALSE;
4347              break;
4348              }
4349          }          }
4350        /* Control never gets here */        /* Control never gets here */
4351        }        }
# Line 3828  for (;;) Line 4378  for (;;)
4378          case OP_NOT_DIGIT:          case OP_NOT_DIGIT:
4379          for (i = min; i < max; i++)          for (i = min; i < max; i++)
4380            {            {
4381            if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_digit) != 0)            if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) != 0)
4382              break;              break;
4383            eptr++;            eptr++;
4384            }            }
# Line 3837  for (;;) Line 4387  for (;;)
4387          case OP_DIGIT:          case OP_DIGIT:
4388          for (i = min; i < max; i++)          for (i = min; i < max; i++)
4389            {            {
4390            if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_digit) == 0)            if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) == 0)
4391              break;              break;
4392            eptr++;            eptr++;
4393            }            }
# Line 3846  for (;;) Line 4396  for (;;)
4396          case OP_NOT_WHITESPACE:          case OP_NOT_WHITESPACE:
4397          for (i = min; i < max; i++)          for (i = min; i < max; i++)
4398            {            {
4399            if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_space) != 0)            if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) != 0)
4400              break;              break;
4401            eptr++;            eptr++;
4402            }            }
# Line 3855  for (;;) Line 4405  for (;;)
4405          case OP_WHITESPACE:          case OP_WHITESPACE:
4406          for (i = min; i < max; i++)          for (i = min; i < max; i++)
4407            {            {
4408            if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_space) == 0)            if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) == 0)
4409              break;              break;
4410            eptr++;            eptr++;
4411            }            }
# Line 3864  for (;;) Line 4414  for (;;)
4414          case OP_NOT_WORDCHAR:          case OP_NOT_WORDCHAR:
4415          for (i = min; i < max; i++)          for (i = min; i < max; i++)
4416            {            {
4417            if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_word) != 0)            if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) != 0)
4418              break;              break;
4419            eptr++;            eptr++;
4420            }            }
# Line 3873  for (;;) Line 4423  for (;;)
4423          case OP_WORDCHAR:          case OP_WORDCHAR:
4424          for (i = min; i < max; i++)          for (i = min; i < max; i++)
4425            {            {
4426            if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_word) == 0)            if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) == 0)
4427              break;              break;
4428            eptr++;            eptr++;
4429            }            }
# Line 3919  Arguments: Line 4469  Arguments:
4469    external_extra  points to "hints" from pcre_study() or is NULL    external_extra  points to "hints" from pcre_study() or is NULL
4470    subject         points to the subject string    subject         points to the subject string
4471    length          length of subject string (may contain binary zeros)    length          length of subject string (may contain binary zeros)
4472      start_offset    where to start in the subject string
4473    options         option bits    options         option bits
4474    offsets         points to a vector of ints to be filled in with offsets    offsets         points to a vector of ints to be filled in with offsets
4475    offsetcount     the number of elements in the vector    offsetcount     the number of elements in the vector
# Line 3931  Returns:          > 0 => success; value Line 4482  Returns:          > 0 => success; value
4482    
4483  int  int
4484  pcre_exec(const pcre *external_re, const pcre_extra *external_extra,  pcre_exec(const pcre *external_re, const pcre_extra *external_extra,
4485    const char *subject, int length, int options, int *offsets, int offsetcount)    const char *subject, int length, int start_offset, int options, int *offsets,
4486      int offsetcount)
4487  {  {
4488  int resetcount, ocount;  int resetcount, ocount;
4489  int first_char = -1;  int first_char = -1;
4490  int ims = 0;  int req_char = -1;
4491    int req_char2 = -1;
4492    unsigned long int ims = 0;
4493  match_data match_block;  match_data match_block;
4494  const uschar *start_bits = NULL;  const uschar *start_bits = NULL;
4495  const uschar *start_match = (const uschar *)subject;  const uschar *start_match = (const uschar *)subject + start_offset;
4496  const uschar *end_subject;  const uschar *end_subject;
4497    const uschar *req_char_ptr = start_match - 1;
4498  const real_pcre *re = (const real_pcre *)external_re;  const real_pcre *re = (const real_pcre *)external_re;
4499  const real_pcre_extra *extra = (const real_pcre_extra *)external_extra;  const real_pcre_extra *extra = (const real_pcre_extra *)external_extra;
4500  BOOL using_temporary_offsets = FALSE;  BOOL using_temporary_offsets = FALSE;
# Line 3952  if (re == NULL || subject == NULL || Line 4507  if (re == NULL || subject == NULL ||
4507     (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;     (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
4508  if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;  if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;
4509    
4510    match_block.start_pattern = re->code;
4511  match_block.start_subject = (const uschar *)subject;  match_block.start_subject = (const uschar *)subject;
4512  match_block.end_subject = match_block.start_subject + length;  match_block.end_subject = match_block.start_subject + length;
4513  end_subject = match_block.end_subject;  end_subject = match_block.end_subject;
# Line 3960  match_block.endonly = (re->options & PCR Line 4516  match_block.endonly = (re->options & PCR
4516    
4517  match_block.notbol = (options & PCRE_NOTBOL) != 0;  match_block.notbol = (options & PCRE_NOTBOL) != 0;
4518  match_block.noteol = (options & PCRE_NOTEOL) != 0;  match_block.noteol = (options & PCRE_NOTEOL) != 0;
4519    match_block.notempty = (options & PCRE_NOTEMPTY) != 0;
4520    
4521  match_block.errorcode = PCRE_ERROR_NOMATCH;     /* Default error */  match_block.errorcode = PCRE_ERROR_NOMATCH;     /* Default error */
4522    
4523    match_block.lcc = re->tables + lcc_offset;
4524    match_block.ctypes = re->tables + ctypes_offset;
4525    
4526  /* The ims options can vary during the matching as a result of the presence  /* The ims options can vary during the matching as a result of the presence
4527  of (?ims) items in the pattern. They are kept in a local variable so that  of (?ims) items in the pattern. They are kept in a local variable so that
4528  restoring at the exit of a group is easy. */  restoring at the exit of a group is easy. */
# Line 3997  in the pattern. */ Line 4557  in the pattern. */
4557  resetcount = 2 + re->top_bracket * 2;  resetcount = 2 + re->top_bracket * 2;
4558  if (resetcount > offsetcount) resetcount = ocount;  if (resetcount > offsetcount) resetcount = ocount;
4559    
4560    /* Reset the working variable associated with each extraction. These should
4561    never be used unless previously set, but they get saved and restored, and so we
4562    initialize them to avoid reading uninitialized locations. */
4563    
4564    if (match_block.offset_vector != NULL)
4565      {
4566      register int *iptr = match_block.offset_vector + ocount;
4567      register int *iend = iptr - resetcount/2 + 1;
4568      while (--iptr >= iend) *iptr = -1;
4569      }
4570    
4571  /* Set up the first character to match, if available. The first_char value is  /* Set up the first character to match, if available. The first_char value is
4572  never set for an anchored regular expression, but the anchoring may be forced  never set for an anchored regular expression, but the anchoring may be forced
4573  at run time, so we have to test for anchoring. The first char may be unset for  at run time, so we have to test for anchoring. The first char may be unset for
# Line 4008  if (!anchored) Line 4579  if (!anchored)
4579    if ((re->options & PCRE_FIRSTSET) != 0)    if ((re->options & PCRE_FIRSTSET) != 0)
4580      {      {
4581      first_char = re->first_char;      first_char = re->first_char;
4582      if ((ims & PCRE_CASELESS) != 0) first_char = pcre_lcc[first_char];      if ((ims & PCRE_CASELESS) != 0) first_char = match_block.lcc[first_char];
4583      }      }
4584    else    else
4585      if (!startline && extra != NULL &&      if (!startline && extra != NULL &&
# Line 4016  if (!anchored) Line 4587  if (!anchored)
4587          start_bits = extra->start_bits;          start_bits = extra->start_bits;
4588    }    }
4589    
4590  /* Loop for unanchored matches; for anchored regexps the loop runs just once. */  /* For anchored or unanchored matches, there may be a "last known required
4591    character" set. If the PCRE_CASELESS is set, implying that the match starts
4592    caselessly, or if there are any changes of this flag within the regex, set up
4593    both cases of the character. Otherwise set the two values the same, which will
4594    avoid duplicate testing (which takes significant time). This covers the vast
4595    majority of cases. It will be suboptimal when the case flag changes in a regex
4596    and the required character in fact is caseful. */
4597    
4598    if ((re->options & PCRE_REQCHSET) != 0)
4599      {
4600      req_char = re->req_char;
4601      req_char2 = ((re->options & (PCRE_CASELESS | PCRE_ICHANGED)) != 0)?
4602        (re->tables + fcc_offset)[req_char] : req_char;
4603      }
4604    
4605    /* Loop for handling unanchored repeated matching attempts; for anchored regexs
4606    the loop runs just once. */
4607    
4608  do  do
4609    {    {
# Line 4033  do Line 4620  do
4620    if (first_char >= 0)    if (first_char >= 0)
4621      {      {
4622      if ((ims & PCRE_CASELESS) != 0)      if ((ims & PCRE_CASELESS) != 0)
4623        while (start_match < end_subject && pcre_lcc[*start_match] != first_char)        while (start_match < end_subject &&
4624                 match_block.lcc[*start_match] != first_char)
4625          start_match++;          start_match++;
4626      else      else
4627        while (start_match < end_subject && *start_match != first_char)        while (start_match < end_subject && *start_match != first_char)
# Line 4044  do Line 4632  do
4632    
4633    else if (startline)    else if (startline)
4634      {      {
4635      if (start_match > match_block.start_subject)      if (start_match > match_block.start_subject + start_offset)
4636        {        {
4637        while (start_match < end_subject && start_match[-1] != '\n')        while (start_match < end_subject && start_match[-1] != '\n')
4638          start_match++;          start_match++;
4639        }        }
4640      }      }
4641    
4642    /* Or to a non-unique first char */    /* Or to a non-unique first char after study */
4643    
4644    else if (start_bits != NULL)    else if (start_bits != NULL)
4645      {      {
# Line 4068  do Line 4656  do
4656    printf("\n");    printf("\n");
4657  #endif  #endif
4658    
4659      /* If req_char is set, we know that that character must appear in the subject
4660      for the match to succeed. If the first character is set, req_char must be
4661      later in the subject; otherwise the test starts at the match point. This
4662      optimization can save a huge amount of backtracking in patterns with nested
4663      unlimited repeats that aren't going to match. We don't know what the state of
4664      case matching may be when this character is hit, so test for it in both its
4665      cases if necessary. However, the different cased versions will not be set up
4666      unless PCRE_CASELESS was given or the casing state changes within the regex.
4667      Writing separate code makes it go faster, as does using an autoincrement and
4668      backing off on a match. */
4669    
4670      if (req_char >= 0)
4671        {
4672        register const uschar *p = start_match + ((first_char >= 0)? 1 : 0);
4673    
4674        /* We don't need to repeat the search if we haven't yet reached the
4675        place we found it at last time. */
4676    
4677        if (p > req_char_ptr)
4678          {
4679          /* Do a single test if no case difference is set up */
4680    
4681          if (req_char == req_char2)
4682            {
4683            while (p < end_subject)
4684              {
4685              if (*p++ == req_char) { p--; break; }
4686              }
4687            }
4688    
4689          /* Otherwise test for either case */
4690    
4691          else
4692            {
4693            while (p < end_subject)
4694              {
4695              register int pp = *p++;
4696              if (pp == req_char || pp == req_char2) { p--; break; }
4697              }
4698            }
4699    
4700          /* If we can't find the required character, break the matching loop */
4701    
4702          if (p >= end_subject) break;
4703    
4704          /* If we have found the required character, save the point where we
4705          found it, so that we don't search again next time round the loop if
4706          the start hasn't passed this character yet. */
4707    
4708          req_char_ptr = p;
4709          }
4710        }
4711    
4712    /* When a match occurs, substrings will be set for all internal extractions;    /* When a match occurs, substrings will be set for all internal extractions;
4713    we just need to set up the whole thing as substring 0 before returning. If    we just need to set up the whole thing as substring 0 before returning. If
4714    there were too many extractions, set the return code to zero. In the case    there were too many extractions, set the return code to zero. In the case
# Line 4075  do Line 4716  do
4716    those back references that we can. In this case there need not be overflow    those back references that we can. In this case there need not be overflow
4717    if certain parts of the pattern were not used. */    if certain parts of the pattern were not used. */
4718    
4719      match_block.start_match = start_match;
4720    if (!match(start_match, re->code, 2, &match_block, ims, FALSE, start_match))    if (!match(start_match, re->code, 2, &match_block, ims, FALSE, start_match))
4721      continue;      continue;
4722    
# Line 4106  do Line 4748  do
4748    DPRINTF((">>>> returning %d\n", rc));    DPRINTF((">>>> returning %d\n", rc));
4749    return rc;    return rc;
4750    }    }
4751    
4752    /* This "while" is the end of the "do" above */
4753    
4754  while (!anchored &&  while (!anchored &&
4755         match_block.errorcode == PCRE_ERROR_NOMATCH &&         match_block.errorcode == PCRE_ERROR_NOMATCH &&
4756         start_match++ < end_subject);         start_match++ < end_subject);

Legend:
Removed from v.23  
changed lines
  Added in v.43

  ViewVC Help
Powered by ViewVC 1.1.5