/[pcre]/code/trunk/pcre.c
ViewVC logotype

Diff of /code/trunk/pcre.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 39 by nigel, Sat Feb 24 21:39:13 2007 UTC revision 47 by nigel, Sat Feb 24 21:39:29 2007 UTC
# Line 9  the file Tech.Notes for some information Line 9  the file Tech.Notes for some information
9    
10  Written by: Philip Hazel <ph10@cam.ac.uk>  Written by: Philip Hazel <ph10@cam.ac.uk>
11    
12             Copyright (c) 1997-1999 University of Cambridge             Copyright (c) 1997-2000 University of Cambridge
13    
14  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
15  Permission is granted to anyone to use this software for any purpose on any  Permission is granted to anyone to use this software for any purpose on any
# Line 82  static const char *OP_names[] = { Line 82  static const char *OP_names[] = {
82    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
83    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
84    "*", "*?", "+", "+?", "?", "??", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{",
85    "class", "Ref",    "class", "Ref", "Recurse",
86    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",
87    "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",    "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",
88    "Brazero", "Braminzero", "Bra"    "Brazero", "Braminzero", "Bra"
# Line 107  static const short int escapes[] = { Line 107  static const short int escapes[] = {
107      0,      0, -ESC_z                                            /* x - z */      0,      0, -ESC_z                                            /* x - z */
108  };  };
109    
110    /* Tables of names of POSIX character classes and their lengths. The list is
111    terminated by a zero length entry. The first three must be alpha, upper, lower,
112    as this is assumed for handling case independence. */
113    
114    static const char *posix_names[] = {
115      "alpha", "lower", "upper",
116      "alnum", "ascii", "cntrl", "digit", "graph",
117      "print", "punct", "space", "word",  "xdigit" };
118    
119    static const uschar posix_name_lengths[] = {
120      5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 6, 0 };
121    
122    /* Table of class bit maps for each POSIX class; up to three may be combined
123    to form the class. */
124    
125    static const int posix_class_maps[] = {
126      cbit_lower, cbit_upper, -1,             /* alpha */
127      cbit_lower, -1,         -1,             /* lower */
128      cbit_upper, -1,         -1,             /* upper */
129      cbit_digit, cbit_lower, cbit_upper,     /* alnum */
130      cbit_print, cbit_cntrl, -1,             /* ascii */
131      cbit_cntrl, -1,         -1,             /* cntrl */
132      cbit_digit, -1,         -1,             /* digit */
133      cbit_graph, -1,         -1,             /* graph */
134      cbit_print, -1,         -1,             /* print */
135      cbit_punct, -1,         -1,             /* punct */
136      cbit_space, -1,         -1,             /* space */
137      cbit_word,  -1,         -1,             /* word */
138      cbit_xdigit,-1,         -1              /* xdigit */
139    };
140    
141    
142  /* Definition to allow mutual recursion */  /* Definition to allow mutual recursion */
143    
144  static BOOL  static BOOL
145    compile_regex(int, int, int *, uschar **, const uschar **, const char **,    compile_regex(int, int, int *, uschar **, const uschar **, const char **,
146      BOOL, int, int *, int *, compile_data *);      BOOL, int, int *, int *, compile_data *);
147    
148    /* Structure for building a chain of data that actually lives on the
149    stack, for holding the values of the subject pointer at the start of each
150    subpattern, so as to detect when an empty string has been matched by a
151    subpattern - to break infinite loops. */
152    
153    typedef struct eptrblock {
154      struct eptrblock *prev;
155      const uschar *saved_eptr;
156    } eptrblock;
157    
158    /* Flag bits for the match() function */
159    
160    #define match_condassert   0x01    /* Called to check a condition assertion */
161    #define match_isgroup      0x02    /* Set if start of bracketed group */
162    
163    
164    
165  /*************************************************  /*************************************************
# Line 161  return XSTRING(PCRE_MAJOR) "." XSTRING(P Line 208  return XSTRING(PCRE_MAJOR) "." XSTRING(P
208    
209    
210  /*************************************************  /*************************************************
211  *       Return info about a compiled pattern     *  * (Obsolete) Return info about compiled pattern  *
212  *************************************************/  *************************************************/
213    
214  /* This function picks potentially useful data out of the private  /* This is the original "info" function. It picks potentially useful data out
215  structure. The public options are passed back in an int - though the  of the private structure, but its interface was too rigid. It remains for
216  re->options field has been expanded to a long int, all the public options  backwards compatibility. The public options are passed back in an int - though
217    the re->options field has been expanded to a long int, all the public options
218  at the low end of it, and so even on 16-bit systems this will still be OK.  at the low end of it, and so even on 16-bit systems this will still be OK.
219  Therefore, I haven't changed the API for pcre_info().  Therefore, I haven't changed the API for pcre_info().
220    
# Line 177  Arguments: Line 225  Arguments:
225                  or -1 if multiline and all branches start ^,                  or -1 if multiline and all branches start ^,
226                  or -2 otherwise                  or -2 otherwise
227    
228  Returns:        number of identifying extraction brackets  Returns:        number of capturing subpatterns
229                  or negative values on error                  or negative values on error
230  */  */
231    
# Line 196  return re->top_bracket; Line 244  return re->top_bracket;
244    
245    
246    
247    /*************************************************
248    *        Return info about compiled pattern      *
249    *************************************************/
250    
251    /* This is a newer "info" function which has an extensible interface so
252    that additional items can be added compatibly.
253    
254    Arguments:
255      external_re      points to compiled code
256      external_study   points to study data, or NULL
257      what             what information is required
258      where            where to put the information
259    
260    Returns:           0 if data returned, negative on error
261    */
262    
263    int
264    pcre_fullinfo(const pcre *external_re, const pcre_extra *study_data, int what,
265      void *where)
266    {
267    const real_pcre *re = (const real_pcre *)external_re;
268    const real_pcre_extra *study = (const real_pcre_extra *)study_data;
269    
270    if (re == NULL || where == NULL) return PCRE_ERROR_NULL;
271    if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;
272    
273    switch (what)
274      {
275      case PCRE_INFO_OPTIONS:
276      *((unsigned long int *)where) = re->options & PUBLIC_OPTIONS;
277      break;
278    
279      case PCRE_INFO_SIZE:
280      *((size_t *)where) = re->size;
281      break;
282    
283      case PCRE_INFO_CAPTURECOUNT:
284      *((int *)where) = re->top_bracket;
285      break;
286    
287      case PCRE_INFO_BACKREFMAX:
288      *((int *)where) = re->top_backref;
289      break;
290    
291      case PCRE_INFO_FIRSTCHAR:
292      *((int *)where) =
293        ((re->options & PCRE_FIRSTSET) != 0)? re->first_char :
294        ((re->options & PCRE_STARTLINE) != 0)? -1 : -2;
295      break;
296    
297      case PCRE_INFO_FIRSTTABLE:
298      *((const uschar **)where) =
299        (study != NULL && (study->options & PCRE_STUDY_MAPPED) != 0)?
300          study->start_bits : NULL;
301      break;
302    
303      case PCRE_INFO_LASTLITERAL:
304      *((int *)where) =
305        ((re->options & PCRE_REQCHSET) != 0)? re->req_char : -1;
306      break;
307    
308      default: return PCRE_ERROR_BADOPTION;
309      }
310    
311    return 0;
312    }
313    
314    
315    
316  #ifdef DEBUG  #ifdef DEBUG
317  /*************************************************  /*************************************************
# Line 255  check_escape(const uschar **ptrptr, cons Line 371  check_escape(const uschar **ptrptr, cons
371    int options, BOOL isclass, compile_data *cd)    int options, BOOL isclass, compile_data *cd)
372  {  {
373  const uschar *ptr = *ptrptr;  const uschar *ptr = *ptrptr;
374  int c = *(++ptr) & 255;   /* Ensure > 0 on signed-char systems */  int c, i;
 int i;  
375    
376    c = *(++ptr) & 255;   /* Ensure > 0 on signed-char systems */
377  if (c == 0) *errorptr = ERR1;  if (c == 0) *errorptr = ERR1;
378    
379  /* Digits or letters may have special meaning; all others are literals. */  /* Digits or letters may have special meaning; all others are literals. */
# Line 622  for (;;) Line 738  for (;;)
738    
739    
740  /*************************************************  /*************************************************
741    *           Check for POSIX class syntax         *
742    *************************************************/
743    
744    /* This function is called when the sequence "[:" or "[." or "[=" is
745    encountered in a character class. It checks whether this is followed by an
746    optional ^ and then a sequence of letters, terminated by a matching ":]" or
747    ".]" or "=]".
748    
749    Argument:
750      ptr      pointer to the initial [
751      endptr   where to return the end pointer
752      cd       pointer to compile data
753    
754    Returns:   TRUE or FALSE
755    */
756    
757    static BOOL
758    check_posix_syntax(const uschar *ptr, const uschar **endptr, compile_data *cd)
759    {
760    int terminator;          /* Don't combine these lines; the Solaris cc */
761    terminator = *(++ptr);   /* compiler warns about "non-constant" initializer. */
762    if (*(++ptr) == '^') ptr++;
763    while ((cd->ctypes[*ptr] & ctype_letter) != 0) ptr++;
764    if (*ptr == terminator && ptr[1] == ']')
765      {
766      *endptr = ptr;
767      return TRUE;
768      }
769    return FALSE;
770    }
771    
772    
773    
774    
775    /*************************************************
776    *          Check POSIX class name                *
777    *************************************************/
778    
779    /* This function is called to check the name given in a POSIX-style class entry
780    such as [:alnum:].
781    
782    Arguments:
783      ptr        points to the first letter
784      len        the length of the name
785    
786    Returns:     a value representing the name, or -1 if unknown
787    */
788    
789    static int
790    check_posix_name(const uschar *ptr, int len)
791    {
792    register int yield = 0;
793    while (posix_name_lengths[yield] != 0)
794      {
795      if (len == posix_name_lengths[yield] &&
796        strncmp((const char *)ptr, posix_names[yield], len) == 0) return yield;
797      yield++;
798      }
799    return -1;
800    }
801    
802    
803    
804    
805    /*************************************************
806  *           Compile one branch                   *  *           Compile one branch                   *
807  *************************************************/  *************************************************/
808    
# Line 689  for (;; ptr++) Line 870  for (;; ptr++)
870      if ((cd->ctypes[c] & ctype_space) != 0) continue;      if ((cd->ctypes[c] & ctype_space) != 0) continue;
871      if (c == '#')      if (c == '#')
872        {        {
873        while ((c = *(++ptr)) != 0 && c != '\n');        /* The space before the ; is to avoid a warning on a silly compiler
874          on the Macintosh. */
875          while ((c = *(++ptr)) != 0 && c != '\n') ;
876        continue;        continue;
877        }        }
878      }      }
# Line 764  for (;; ptr++) Line 947  for (;; ptr++)
947          goto FAILED;          goto FAILED;
948          }          }
949    
950          /* Handle POSIX class names. Perl allows a negation extension of the
951          form [:^name]. A square bracket that doesn't match the syntax is
952          treated as a literal. We also recognize the POSIX constructions
953          [.ch.] and [=ch=] ("collating elements") and fault them, as Perl
954          5.6 does. */
955    
956          if (c == '[' &&
957              (ptr[1] == ':' || ptr[1] == '.' || ptr[1] == '=') &&
958              check_posix_syntax(ptr, &tempptr, cd))
959            {
960            BOOL local_negate = FALSE;
961            int posix_class, i;
962            register const uschar *cbits = cd->cbits;
963    
964            if (ptr[1] != ':')
965              {
966              *errorptr = ERR31;
967              goto FAILED;
968              }
969    
970            ptr += 2;
971            if (*ptr == '^')
972              {
973              local_negate = TRUE;
974              ptr++;
975              }
976    
977            posix_class = check_posix_name(ptr, tempptr - ptr);
978            if (posix_class < 0)
979              {
980              *errorptr = ERR30;
981              goto FAILED;
982              }
983    
984            /* If matching is caseless, upper and lower are converted to
985            alpha. This relies on the fact that the class table starts with
986            alpha, lower, upper as the first 3 entries. */
987    
988            if ((options & PCRE_CASELESS) != 0 && posix_class <= 2)
989              posix_class = 0;
990    
991            /* Or into the map we are building up to 3 of the static class
992            tables, or their negations. */
993    
994            posix_class *= 3;
995            for (i = 0; i < 3; i++)
996              {
997              int taboffset = posix_class_maps[posix_class + i];
998              if (taboffset < 0) break;
999              if (local_negate)
1000                for (c = 0; c < 32; c++) class[c] |= ~cbits[c+taboffset];
1001              else
1002                for (c = 0; c < 32; c++) class[c] |= cbits[c+taboffset];
1003              }
1004    
1005            ptr = tempptr + 1;
1006            class_charcount = 10;  /* Set > 1; assumes more than 1 per class */
1007            continue;
1008            }
1009    
1010        /* Backslash may introduce a single character, or it may introduce one        /* Backslash may introduce a single character, or it may introduce one
1011        of the specials, which just set a flag. Escaped items are checked for        of the specials, which just set a flag. Escaped items are checked for
1012        validity in the pre-compiling pass. The sequence \b is a special case.        validity in the pre-compiling pass. The sequence \b is a special case.
# Line 791  for (;; ptr++) Line 1034  for (;; ptr++)
1034              continue;              continue;
1035    
1036              case ESC_w:              case ESC_w:
1037              for (c = 0; c < 32; c++)              for (c = 0; c < 32; c++) class[c] |= cbits[c+cbit_word];
               class[c] |= (cbits[c+cbit_digit] | cbits[c+cbit_word]);  
1038              continue;              continue;
1039    
1040              case ESC_W:              case ESC_W:
1041              for (c = 0; c < 32; c++)              for (c = 0; c < 32; c++) class[c] |= ~cbits[c+cbit_word];
               class[c] |= ~(cbits[c+cbit_digit] | cbits[c+cbit_word]);  
1042              continue;              continue;
1043    
1044              case ESC_s:              case ESC_s:
# Line 1360  for (;; ptr++) Line 1601  for (;; ptr++)
1601          ptr++;          ptr++;
1602          break;          break;
1603    
1604            case 'R':                 /* Pattern recursion */
1605            *code++ = OP_RECURSE;
1606            ptr++;
1607            continue;
1608    
1609          default:                  /* Option setting */          default:                  /* Option setting */
1610          set = unset = 0;          set = unset = 0;
1611          optset = &set;          optset = &set;
# Line 1566  for (;; ptr++) Line 1812  for (;; ptr++)
1812          if ((cd->ctypes[c] & ctype_space) != 0) continue;          if ((cd->ctypes[c] & ctype_space) != 0) continue;
1813          if (c == '#')          if (c == '#')
1814            {            {
1815            while ((c = *(++ptr)) != 0 && c != '\n');            /* The space before the ; is to avoid a warning on a silly compiler
1816              on the Macintosh. */
1817              while ((c = *(++ptr)) != 0 && c != '\n') ;
1818            if (c == 0) break;            if (c == 0) break;
1819            continue;            continue;
1820            }            }
# Line 2015  pcre_compile(const char *pattern, int op Line 2263  pcre_compile(const char *pattern, int op
2263  real_pcre *re;  real_pcre *re;
2264  int length = 3;      /* For initial BRA plus length */  int length = 3;      /* For initial BRA plus length */
2265  int runlength;  int runlength;
2266  int c, size, reqchar, countlits;  int c, reqchar, countlits;
2267  int bracount = 0;  int bracount = 0;
2268  int top_backref = 0;  int top_backref = 0;
2269  int branch_extra = 0;  int branch_extra = 0;
2270  int branch_newextra;  int branch_newextra;
2271  unsigned int brastackptr = 0;  unsigned int brastackptr = 0;
2272    size_t size;
2273  uschar *code;  uschar *code;
2274  const uschar *ptr;  const uschar *ptr;
2275  compile_data compile_block;  compile_data compile_block;
# Line 2083  while ((c = *(++ptr)) != 0) Line 2332  while ((c = *(++ptr)) != 0)
2332      if ((compile_block.ctypes[c] & ctype_space) != 0) continue;      if ((compile_block.ctypes[c] & ctype_space) != 0) continue;
2333      if (c == '#')      if (c == '#')
2334        {        {
2335        while ((c = *(++ptr)) != 0 && c != '\n');        /* The space before the ; is to avoid a warning on a silly compiler
2336          on the Macintosh. */
2337          while ((c = *(++ptr)) != 0 && c != '\n') ;
2338        continue;        continue;
2339        }        }
2340      }      }
# Line 2248  while ((c = *(++ptr)) != 0) Line 2499  while ((c = *(++ptr)) != 0)
2499          ptr += 2;          ptr += 2;
2500          break;          break;
2501    
2502            /* A recursive call to the regex is an extension, to provide the
2503            facility which can be obtained by $(?p{perl-code}) in Perl 5.6. */
2504    
2505            case 'R':
2506            if (ptr[3] != ')')
2507              {
2508              *errorptr = ERR29;
2509              goto PCRE_ERROR_RETURN;
2510              }
2511            ptr += 3;
2512            length += 1;
2513            break;
2514    
2515          /* Lookbehinds are in Perl from version 5.005 */          /* Lookbehinds are in Perl from version 5.005 */
2516    
2517          case '<':          case '<':
# Line 2280  while ((c = *(++ptr)) != 0) Line 2544  while ((c = *(++ptr)) != 0)
2544          else   /* An assertion must follow */          else   /* An assertion must follow */
2545            {            {
2546            ptr++;   /* Can treat like ':' as far as spacing is concerned */            ptr++;   /* Can treat like ':' as far as spacing is concerned */
2547              if (ptr[2] != '?' ||
2548            if (ptr[2] != '?' || strchr("=!<", ptr[3]) == NULL)               (ptr[3] != '=' && ptr[3] != '!' && ptr[3] != '<') )
2549              {              {
2550              ptr += 2;    /* To get right offset in message */              ptr += 2;    /* To get right offset in message */
2551              *errorptr = ERR28;              *errorptr = ERR28;
# Line 2494  while ((c = *(++ptr)) != 0) Line 2758  while ((c = *(++ptr)) != 0)
2758          if ((compile_block.ctypes[c] & ctype_space) != 0) continue;          if ((compile_block.ctypes[c] & ctype_space) != 0) continue;
2759          if (c == '#')          if (c == '#')
2760            {            {
2761            while ((c = *(++ptr)) != 0 && c != '\n');            /* The space before the ; is to avoid a warning on a silly compiler
2762              on the Macintosh. */
2763              while ((c = *(++ptr)) != 0 && c != '\n') ;
2764            continue;            continue;
2765            }            }
2766          }          }
# Line 2550  if (re == NULL) Line 2816  if (re == NULL)
2816    return NULL;    return NULL;
2817    }    }
2818    
2819  /* Put in the magic number and the options. */  /* Put in the magic number, and save the size, options, and table pointer */
2820    
2821  re->magic_number = MAGIC_NUMBER;  re->magic_number = MAGIC_NUMBER;
2822    re->size = size;
2823  re->options = options;  re->options = options;
2824  re->tables = tables;  re->tables = tables;
2825    
# Line 2951  Arguments: Line 3218  Arguments:
3218     offset_top  current top pointer     offset_top  current top pointer
3219     md          pointer to "static" info for the match     md          pointer to "static" info for the match
3220     ims         current /i, /m, and /s options     ims         current /i, /m, and /s options
3221     condassert  TRUE if called to check a condition assertion     eptrb       pointer to chain of blocks containing eptr at start of
3222     eptrb       eptr at start of last bracket                   brackets - for testing for empty matches
3223       flags       can contain
3224                     match_condassert - this is an assertion condition
3225                     match_isgroup - this is the start of a bracketed group
3226    
3227  Returns:       TRUE if matched  Returns:       TRUE if matched
3228  */  */
3229    
3230  static BOOL  static BOOL
3231  match(register const uschar *eptr, register const uschar *ecode,  match(register const uschar *eptr, register const uschar *ecode,
3232    int offset_top, match_data *md, unsigned long int ims, BOOL condassert,    int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,
3233    const uschar *eptrb)    int flags)
3234  {  {
3235  unsigned long int original_ims = ims;   /* Save for resetting on ')' */  unsigned long int original_ims = ims;   /* Save for resetting on ')' */
3236    eptrblock newptrb;
3237    
3238    /* At the start of a bracketed group, add the current subject pointer to the
3239    stack of such pointers, to be re-instated at the end of the group when we hit
3240    the closing ket. When match() is called in other circumstances, we don't add to
3241    the stack. */
3242    
3243    if ((flags & match_isgroup) != 0)
3244      {
3245      newptrb.prev = eptrb;
3246      newptrb.saved_eptr = eptr;
3247      eptrb = &newptrb;
3248      }
3249    
3250    /* Now start processing the operations. */
3251    
3252  for (;;)  for (;;)
3253    {    {
# Line 3008  for (;;) Line 3293  for (;;)
3293    
3294        do        do
3295          {          {
3296          if (match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr)) return TRUE;          if (match(eptr, ecode+3, offset_top, md, ims, eptrb, match_isgroup))
3297              return TRUE;
3298          ecode += (ecode[1] << 8) + ecode[2];          ecode += (ecode[1] << 8) + ecode[2];
3299          }          }
3300        while (*ecode == OP_ALT);        while (*ecode == OP_ALT);
# Line 3034  for (;;) Line 3320  for (;;)
3320      DPRINTF(("start bracket 0\n"));      DPRINTF(("start bracket 0\n"));
3321      do      do
3322        {        {
3323        if (match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr)) return TRUE;        if (match(eptr, ecode+3, offset_top, md, ims, eptrb, match_isgroup))
3324            return TRUE;
3325        ecode += (ecode[1] << 8) + ecode[2];        ecode += (ecode[1] << 8) + ecode[2];
3326        }        }
3327      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 3053  for (;;) Line 3340  for (;;)
3340        return match(eptr,        return match(eptr,
3341          ecode + ((offset < offset_top && md->offset_vector[offset] >= 0)?          ecode + ((offset < offset_top && md->offset_vector[offset] >= 0)?
3342            5 : 3 + (ecode[1] << 8) + ecode[2]),            5 : 3 + (ecode[1] << 8) + ecode[2]),
3343          offset_top, md, ims, FALSE, eptr);          offset_top, md, ims, eptrb, match_isgroup);
3344        }        }
3345    
3346      /* The condition is an assertion. Call match() to evaluate it - setting      /* The condition is an assertion. Call match() to evaluate it - setting
# Line 3061  for (;;) Line 3348  for (;;)
3348    
3349      else      else
3350        {        {
3351        if (match(eptr, ecode+3, offset_top, md, ims, TRUE, NULL))        if (match(eptr, ecode+3, offset_top, md, ims, NULL,
3352              match_condassert | match_isgroup))
3353          {          {
3354          ecode += 3 + (ecode[4] << 8) + ecode[5];          ecode += 3 + (ecode[4] << 8) + ecode[5];
3355          while (*ecode == OP_ALT) ecode += (ecode[1] << 8) + ecode[2];          while (*ecode == OP_ALT) ecode += (ecode[1] << 8) + ecode[2];
3356          }          }
3357        else ecode += (ecode[1] << 8) + ecode[2];        else ecode += (ecode[1] << 8) + ecode[2];
3358        return match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr);        return match(eptr, ecode+3, offset_top, md, ims, eptrb, match_isgroup);
3359        }        }
3360      /* Control never reaches here */      /* Control never reaches here */
3361    
# Line 3104  for (;;) Line 3392  for (;;)
3392      case OP_ASSERTBACK:      case OP_ASSERTBACK:
3393      do      do
3394        {        {
3395        if (match(eptr, ecode+3, offset_top, md, ims, FALSE, NULL)) break;        if (match(eptr, ecode+3, offset_top, md, ims, NULL, match_isgroup)) break;
3396        ecode += (ecode[1] << 8) + ecode[2];        ecode += (ecode[1] << 8) + ecode[2];
3397        }        }
3398      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 3112  for (;;) Line 3400  for (;;)
3400    
3401      /* If checking an assertion for a condition, return TRUE. */      /* If checking an assertion for a condition, return TRUE. */
3402    
3403      if (condassert) return TRUE;      if ((flags & match_condassert) != 0) return TRUE;
3404    
3405      /* Continue from after the assertion, updating the offsets high water      /* Continue from after the assertion, updating the offsets high water
3406      mark, since extracts may have been taken during the assertion. */      mark, since extracts may have been taken during the assertion. */
# Line 3128  for (;;) Line 3416  for (;;)
3416      case OP_ASSERTBACK_NOT:      case OP_ASSERTBACK_NOT:
3417      do      do
3418        {        {
3419        if (match(eptr, ecode+3, offset_top, md, ims, FALSE, NULL)) return FALSE;        if (match(eptr, ecode+3, offset_top, md, ims, NULL, match_isgroup))
3420            return FALSE;
3421        ecode += (ecode[1] << 8) + ecode[2];        ecode += (ecode[1] << 8) + ecode[2];
3422        }        }
3423      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
3424    
3425      if (condassert) return TRUE;      if ((flags & match_condassert) != 0) return TRUE;
3426    
3427      ecode += 3;      ecode += 3;
3428      continue;      continue;
3429    
# Line 3147  for (;;) Line 3437  for (;;)
3437      ecode += 3;      ecode += 3;
3438      break;      break;
3439    
3440        /* Recursion matches the current regex, nested. If there are any capturing
3441        brackets started but not finished, we have to save their starting points
3442        and reinstate them after the recursion. However, we don't know how many
3443        such there are (offset_top records the completed total) so we just have
3444        to save all the potential data. There may be up to 99 such values, which
3445        is a bit large to put on the stack, but using malloc for small numbers
3446        seems expensive. As a compromise, the stack is used when there are fewer
3447        than 16 values to store; otherwise malloc is used. A problem is what to do
3448        if the malloc fails ... there is no way of returning to the top level with
3449        an error. Save the top 15 values on the stack, and accept that the rest
3450        may be wrong. */
3451    
3452        case OP_RECURSE:
3453          {
3454          BOOL rc;
3455          int *save;
3456          int stacksave[15];
3457    
3458          c = md->offset_max;
3459    
3460          if (c < 16) save = stacksave; else
3461            {
3462            save = (int *)(pcre_malloc)((c+1) * sizeof(int));
3463            if (save == NULL)
3464              {
3465              save = stacksave;
3466              c = 15;
3467              }
3468            }
3469    
3470          for (i = 1; i <= c; i++)
3471            save[i] = md->offset_vector[md->offset_end - i];
3472          rc = match(eptr, md->start_pattern, offset_top, md, ims, eptrb,
3473            match_isgroup);
3474          for (i = 1; i <= c; i++)
3475            md->offset_vector[md->offset_end - i] = save[i];
3476          if (save != stacksave) (pcre_free)(save);
3477          if (!rc) return FALSE;
3478    
3479          /* In case the recursion has set more capturing values, save the final
3480          number, then move along the subject till after the recursive match,
3481          and advance one byte in the pattern code. */
3482    
3483          offset_top = md->end_offset_top;
3484          eptr = md->end_match_ptr;
3485          ecode++;
3486          }
3487        break;
3488    
3489      /* "Once" brackets are like assertion brackets except that after a match,      /* "Once" brackets are like assertion brackets except that after a match,
3490      the point in the subject string is not moved back. Thus there can never be      the point in the subject string is not moved back. Thus there can never be
# Line 3158  for (;;) Line 3496  for (;;)
3496      case OP_ONCE:      case OP_ONCE:
3497        {        {
3498        const uschar *prev = ecode;        const uschar *prev = ecode;
3499          const uschar *saved_eptr = eptr;
3500    
3501        do        do
3502          {          {
3503          if (match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr)) break;          if (match(eptr, ecode+3, offset_top, md, ims, eptrb, match_isgroup))
3504              break;
3505          ecode += (ecode[1] << 8) + ecode[2];          ecode += (ecode[1] << 8) + ecode[2];
3506          }          }
3507        while (*ecode == OP_ALT);        while (*ecode == OP_ALT);
# Line 3184  for (;;) Line 3524  for (;;)
3524        5.005. If there is an options reset, it will get obeyed in the normal        5.005. If there is an options reset, it will get obeyed in the normal
3525        course of events. */        course of events. */
3526    
3527        if (*ecode == OP_KET || eptr == eptrb)        if (*ecode == OP_KET || eptr == saved_eptr)
3528          {          {
3529          ecode += 3;          ecode += 3;
3530          break;          break;
# Line 3203  for (;;) Line 3543  for (;;)
3543    
3544        if (*ecode == OP_KETRMIN)        if (*ecode == OP_KETRMIN)
3545          {          {
3546          if (match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr) ||          if (match(eptr, ecode+3, offset_top, md, ims, eptrb, 0) ||
3547              match(eptr, prev, offset_top, md, ims, FALSE, eptr)) return TRUE;              match(eptr, prev, offset_top, md, ims, eptrb, match_isgroup))
3548                  return TRUE;
3549          }          }
3550        else  /* OP_KETRMAX */        else  /* OP_KETRMAX */
3551          {          {
3552          if (match(eptr, prev, offset_top, md, ims, FALSE, eptr) ||          if (match(eptr, prev, offset_top, md, ims, eptrb, match_isgroup) ||
3553              match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr)) return TRUE;              match(eptr, ecode+3, offset_top, md, ims, eptrb, 0)) return TRUE;
3554          }          }
3555        }        }
3556      return FALSE;      return FALSE;
# Line 3230  for (;;) Line 3571  for (;;)
3571      case OP_BRAZERO:      case OP_BRAZERO:
3572        {        {
3573        const uschar *next = ecode+1;        const uschar *next = ecode+1;
3574        if (match(eptr, next, offset_top, md, ims, FALSE, eptr)) return TRUE;        if (match(eptr, next, offset_top, md, ims, eptrb, match_isgroup))
3575            return TRUE;
3576        do next += (next[1] << 8) + next[2]; while (*next == OP_ALT);        do next += (next[1] << 8) + next[2]; while (*next == OP_ALT);
3577        ecode = next + 3;        ecode = next + 3;
3578        }        }
# Line 3240  for (;;) Line 3582  for (;;)
3582        {        {
3583        const uschar *next = ecode+1;        const uschar *next = ecode+1;
3584        do next += (next[1] << 8) + next[2]; while (*next == OP_ALT);        do next += (next[1] << 8) + next[2]; while (*next == OP_ALT);
3585        if (match(eptr, next+3, offset_top, md, ims, FALSE, eptr)) return TRUE;        if (match(eptr, next+3, offset_top, md, ims, eptrb, match_isgroup))
3586            return TRUE;
3587        ecode++;        ecode++;
3588        }        }
3589      break;      break;
# Line 3255  for (;;) Line 3598  for (;;)
3598      case OP_KETRMAX:      case OP_KETRMAX:
3599        {        {
3600        const uschar *prev = ecode - (ecode[1] << 8) - ecode[2];        const uschar *prev = ecode - (ecode[1] << 8) - ecode[2];
3601          const uschar *saved_eptr = eptrb->saved_eptr;
3602    
3603          eptrb = eptrb->prev;    /* Back up the stack of bracket start pointers */
3604    
3605        if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||        if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
3606            *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||            *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
# Line 3274  for (;;) Line 3620  for (;;)
3620          int number = *prev - OP_BRA;          int number = *prev - OP_BRA;
3621          int offset = number << 1;          int offset = number << 1;
3622    
3623          DPRINTF(("end bracket %d\n", number));  #ifdef DEBUG
3624            printf("end bracket %d", number);
3625            printf("\n");
3626    #endif
3627    
3628          if (number > 0)          if (number > 0)
3629            {            {
# Line 3300  for (;;) Line 3649  for (;;)
3649        5.005. If there is an options reset, it will get obeyed in the normal        5.005. If there is an options reset, it will get obeyed in the normal
3650        course of events. */        course of events. */
3651    
3652        if (*ecode == OP_KET || eptr == eptrb)        if (*ecode == OP_KET || eptr == saved_eptr)
3653          {          {
3654          ecode += 3;          ecode += 3;
3655          break;          break;
# Line 3311  for (;;) Line 3660  for (;;)
3660    
3661        if (*ecode == OP_KETRMIN)        if (*ecode == OP_KETRMIN)
3662          {          {
3663          if (match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr) ||          if (match(eptr, ecode+3, offset_top, md, ims, eptrb, 0) ||
3664              match(eptr, prev, offset_top, md, ims, FALSE, eptr)) return TRUE;              match(eptr, prev, offset_top, md, ims, eptrb, match_isgroup))
3665                  return TRUE;
3666          }          }
3667        else  /* OP_KETRMAX */        else  /* OP_KETRMAX */
3668          {          {
3669          if (match(eptr, prev, offset_top, md, ims, FALSE, eptr) ||          if (match(eptr, prev, offset_top, md, ims, eptrb, match_isgroup) ||
3670              match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr)) return TRUE;              match(eptr, ecode+3, offset_top, md, ims, eptrb, 0)) return TRUE;
3671          }          }
3672        }        }
3673      return FALSE;      return FALSE;
# Line 3528  for (;;) Line 3878  for (;;)
3878          {          {
3879          for (i = min;; i++)          for (i = min;; i++)
3880            {            {
3881            if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb))            if (match(eptr, ecode, offset_top, md, ims, eptrb, 0))
3882              return TRUE;              return TRUE;
3883            if (i >= max || !match_ref(offset, eptr, length, md, ims))            if (i >= max || !match_ref(offset, eptr, length, md, ims))
3884              return FALSE;              return FALSE;
# Line 3549  for (;;) Line 3899  for (;;)
3899            }            }
3900          while (eptr >= pp)          while (eptr >= pp)
3901            {            {
3902            if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb))            if (match(eptr, ecode, offset_top, md, ims, eptrb, 0))
3903              return TRUE;              return TRUE;
3904            eptr -= length;            eptr -= length;
3905            }            }
# Line 3620  for (;;) Line 3970  for (;;)
3970          {          {
3971          for (i = min;; i++)          for (i = min;; i++)
3972            {            {
3973            if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb))            if (match(eptr, ecode, offset_top, md, ims, eptrb, 0))
3974              return TRUE;              return TRUE;
3975            if (i >= max || eptr >= md->end_subject) return FALSE;            if (i >= max || eptr >= md->end_subject) return FALSE;
3976            c = *eptr++;            c = *eptr++;
# Line 3644  for (;;) Line 3994  for (;;)
3994            }            }
3995    
3996          while (eptr >= pp)          while (eptr >= pp)
3997            if (match(eptr--, ecode, offset_top, md, ims, FALSE, eptrb))            if (match(eptr--, ecode, offset_top, md, ims, eptrb, 0))
3998              return TRUE;              return TRUE;
3999          return FALSE;          return FALSE;
4000          }          }
# Line 3741  for (;;) Line 4091  for (;;)
4091          {          {
4092          for (i = min;; i++)          for (i = min;; i++)
4093            {            {
4094            if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb))            if (match(eptr, ecode, offset_top, md, ims, eptrb, 0))
4095              return TRUE;              return TRUE;
4096            if (i >= max || eptr >= md->end_subject ||            if (i >= max || eptr >= md->end_subject ||
4097                c != md->lcc[*eptr++])                c != md->lcc[*eptr++])
# Line 3758  for (;;) Line 4108  for (;;)
4108            eptr++;            eptr++;
4109            }            }
4110          while (eptr >= pp)          while (eptr >= pp)
4111            if (match(eptr--, ecode, offset_top, md, ims, FALSE, eptrb))            if (match(eptr--, ecode, offset_top, md, ims, eptrb, 0))
4112              return TRUE;              return TRUE;
4113          return FALSE;          return FALSE;
4114          }          }
# Line 3775  for (;;) Line 4125  for (;;)
4125          {          {
4126          for (i = min;; i++)          for (i = min;; i++)
4127            {            {
4128            if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb))            if (match(eptr, ecode, offset_top, md, ims, eptrb, 0))
4129              return TRUE;              return TRUE;
4130            if (i >= max || eptr >= md->end_subject || c != *eptr++) return FALSE;            if (i >= max || eptr >= md->end_subject || c != *eptr++) return FALSE;
4131            }            }
# Line 3790  for (;;) Line 4140  for (;;)
4140            eptr++;            eptr++;
4141            }            }
4142          while (eptr >= pp)          while (eptr >= pp)
4143           if (match(eptr--, ecode, offset_top, md, ims, FALSE, eptrb))           if (match(eptr--, ecode, offset_top, md, ims, eptrb, 0))
4144             return TRUE;             return TRUE;
4145          return FALSE;          return FALSE;
4146          }          }
# Line 3872  for (;;) Line 4222  for (;;)
4222          {          {
4223          for (i = min;; i++)          for (i = min;; i++)
4224            {            {
4225            if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb))            if (match(eptr, ecode, offset_top, md, ims, eptrb, 0))
4226              return TRUE;              return TRUE;
4227            if (i >= max || eptr >= md->end_subject ||            if (i >= max || eptr >= md->end_subject ||
4228                c == md->lcc[*eptr++])                c == md->lcc[*eptr++])
# Line 3889  for (;;) Line 4239  for (;;)
4239            eptr++;            eptr++;
4240            }            }
4241          while (eptr >= pp)          while (eptr >= pp)
4242            if (match(eptr--, ecode, offset_top, md, ims, FALSE, eptrb))            if (match(eptr--, ecode, offset_top, md, ims, eptrb, 0))
4243              return TRUE;              return TRUE;
4244          return FALSE;          return FALSE;
4245          }          }
# Line 3906  for (;;) Line 4256  for (;;)
4256          {          {
4257          for (i = min;; i++)          for (i = min;; i++)
4258            {            {
4259            if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb))            if (match(eptr, ecode, offset_top, md, ims, eptrb, 0))
4260              return TRUE;              return TRUE;
4261            if (i >= max || eptr >= md->end_subject || c == *eptr++) return FALSE;            if (i >= max || eptr >= md->end_subject || c == *eptr++) return FALSE;
4262            }            }
# Line 3921  for (;;) Line 4271  for (;;)
4271            eptr++;            eptr++;
4272            }            }
4273          while (eptr >= pp)          while (eptr >= pp)
4274           if (match(eptr--, ecode, offset_top, md, ims, FALSE, eptrb))           if (match(eptr--, ecode, offset_top, md, ims, eptrb, 0))
4275             return TRUE;             return TRUE;
4276          return FALSE;          return FALSE;
4277          }          }
# Line 4021  for (;;) Line 4371  for (;;)
4371        {        {
4372        for (i = min;; i++)        for (i = min;; i++)
4373          {          {
4374          if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb)) return TRUE;          if (match(eptr, ecode, offset_top, md, ims, eptrb, 0)) return TRUE;
4375          if (i >= max || eptr >= md->end_subject) return FALSE;          if (i >= max || eptr >= md->end_subject) return FALSE;
4376    
4377          c = *eptr++;          c = *eptr++;
# Line 4140  for (;;) Line 4490  for (;;)
4490          }          }
4491    
4492        while (eptr >= pp)        while (eptr >= pp)
4493          if (match(eptr--, ecode, offset_top, md, ims, FALSE, eptrb))          if (match(eptr--, ecode, offset_top, md, ims, eptrb, 0))
4494            return TRUE;            return TRUE;
4495        return FALSE;        return FALSE;
4496        }        }
# Line 4216  if (re == NULL || subject == NULL || Line 4566  if (re == NULL || subject == NULL ||
4566     (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;     (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
4567  if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;  if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;
4568    
4569    match_block.start_pattern = re->code;
4570  match_block.start_subject = (const uschar *)subject;  match_block.start_subject = (const uschar *)subject;
4571  match_block.end_subject = match_block.start_subject + length;  match_block.end_subject = match_block.start_subject + length;
4572  end_subject = match_block.end_subject;  end_subject = match_block.end_subject;
# Line 4425  do Line 4776  do
4776    if certain parts of the pattern were not used. */    if certain parts of the pattern were not used. */
4777    
4778    match_block.start_match = start_match;    match_block.start_match = start_match;
4779    if (!match(start_match, re->code, 2, &match_block, ims, FALSE, start_match))    if (!match(start_match, re->code, 2, &match_block, ims, NULL, match_isgroup))
4780      continue;      continue;
4781    
4782    /* Copy the offset information from temporary store if necessary */    /* Copy the offset information from temporary store if necessary */

Legend:
Removed from v.39  
changed lines
  Added in v.47

  ViewVC Help
Powered by ViewVC 1.1.5