/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 614 by ph10, Sat Jul 9 10:48:16 2011 UTC revision 615 by ph10, Mon Jul 11 14:23:06 2011 UTC
# Line 847  for (;;) Line 847  for (;;)
847          if (rrc != MATCH_NOMATCH &&          if (rrc != MATCH_NOMATCH &&
848              (rrc != MATCH_THEN || md->start_match_ptr != ecode))              (rrc != MATCH_THEN || md->start_match_ptr != ecode))
849            RRETURN(rrc);            RRETURN(rrc);
   
         /* If md->end_offset_top is greater than offset_top, it means that the  
         branch we have just failed to match did manage to match some capturing  
         parentheses within an atomic group or an assertion. Although offset_top  
         reverts to its original value at this level, we must unset the captured  
         values in case a later match sets a higher capturing number. Example:  
         matching /((?>(a))b|(a)c)/ against "ac". This captures 3, but we need  
         to ensure that 2 - which was captured in the atomic matching - is  
         unset. */  
   
         if (md->end_offset_top > offset_top)  
           {  
           register int *iptr = md->offset_vector + offset_top;  
           register int *iend = md->offset_vector + md->end_offset_top;  
           while (iptr < iend) *iptr++ = -1;  
           }  
   
850          md->capture_last = save_capture_last;          md->capture_last = save_capture_last;
851          ecode += GET(ecode, 1);          ecode += GET(ecode, 1);
852          if (*ecode != OP_ALT) break;          if (*ecode != OP_ALT) break;
# Line 909  for (;;) Line 892  for (;;)
892        if (rrc != MATCH_NOMATCH &&        if (rrc != MATCH_NOMATCH &&
893            (rrc != MATCH_THEN || md->start_match_ptr != ecode))            (rrc != MATCH_THEN || md->start_match_ptr != ecode))
894          RRETURN(rrc);          RRETURN(rrc);
   
       /* See explanatory comment above under OP_CBRA. */  
   
       if (md->end_offset_top > offset_top)  
         {  
         register int *iptr = md->offset_vector + offset_top;  
         register int *iend = md->offset_vector + md->end_offset_top;  
         while (iptr < iend) *iptr++ = -1;  
         }  
   
895        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
896        if (*ecode != OP_ALT) break;        if (*ecode != OP_ALT) break;
897        }        }
# Line 989  for (;;) Line 962  for (;;)
962          if (rrc != MATCH_NOMATCH &&          if (rrc != MATCH_NOMATCH &&
963              (rrc != MATCH_THEN || md->start_match_ptr != ecode))              (rrc != MATCH_THEN || md->start_match_ptr != ecode))
964            RRETURN(rrc);            RRETURN(rrc);
   
         /* See explanatory comment above under OP_CBRA. */  
   
         if (md->end_offset_top > offset_top)  
           {  
           register int *iptr = md->offset_vector + offset_top;  
           register int *iend = md->offset_vector + md->end_offset_top;  
           while (iptr < iend) *iptr++ = -1;  
           }  
   
965          md->capture_last = save_capture_last;          md->capture_last = save_capture_last;
966          ecode += GET(ecode, 1);          ecode += GET(ecode, 1);
967          if (*ecode != OP_ALT) break;          if (*ecode != OP_ALT) break;
# Line 1061  for (;;) Line 1024  for (;;)
1024        if (rrc != MATCH_NOMATCH &&        if (rrc != MATCH_NOMATCH &&
1025            (rrc != MATCH_THEN || md->start_match_ptr != ecode))            (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1026          RRETURN(rrc);          RRETURN(rrc);
   
       /* See explanatory comment above under OP_CBRA. */  
   
       if (md->end_offset_top > offset_top)  
         {  
         register int *iptr = md->offset_vector + offset_top;  
         register int *iend = md->offset_vector + md->end_offset_top;  
         while (iptr < iend) *iptr++ = -1;  
         }  
   
1027        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1028        if (*ecode != OP_ALT) break;        if (*ecode != OP_ALT) break;
1029        }        }
# Line 1413  for (;;) Line 1366  for (;;)
1366        if (rrc != MATCH_NOMATCH &&        if (rrc != MATCH_NOMATCH &&
1367            (rrc != MATCH_THEN || md->start_match_ptr != ecode))            (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1368          RRETURN(rrc);          RRETURN(rrc);
   
       /* See explanatory comment above under OP_CBRA. */  
   
       if (md->end_offset_top > offset_top)  
         {  
         register int *iptr = md->offset_vector + offset_top;  
         register int *iend = md->offset_vector + md->end_offset_top;  
         while (iptr < iend) *iptr++ = -1;  
         }  
   
1369        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1370        }        }
1371      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 1650  for (;;) Line 1593  for (;;)
1593        if (rrc != MATCH_NOMATCH &&        if (rrc != MATCH_NOMATCH &&
1594            (rrc != MATCH_THEN || md->start_match_ptr != ecode))            (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1595          RRETURN(rrc);          RRETURN(rrc);
   
       /* See explanatory comment above under OP_CBRA. */  
   
       if (md->end_offset_top > offset_top)  
         {  
         register int *iptr = md->offset_vector + offset_top;  
         register int *iend = md->offset_vector + md->end_offset_top;  
         while (iptr < iend) *iptr++ = -1;  
         }  
   
1596        ecode += GET(ecode,1);        ecode += GET(ecode,1);
1597        }        }
1598      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 1807  for (;;) Line 1740  for (;;)
1740        md->capture_last = number;        md->capture_last = number;
1741        if (offset >= md->offset_max) md->offset_overflow = TRUE; else        if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1742          {          {
1743            /* If offset is greater than offset_top, it means that we are
1744            "skipping" a capturing group, and that group's offsets must be marked
1745            unset. In earlier versions of PCRE, all the offsets were unset at the
1746            start of matching, but this doesn't work because atomic groups and
1747            assertions can cause a value to be set that should later be unset.
1748            Example: matching /(?>(a))b|(a)c/ against "ac". This sets group 1 as
1749            part of the atomic group, but this is not on the final matching path,
1750            so must be unset when 2 is set. (If there is no group 2, there is no
1751            problem, because offset_top will then be 2, indicating no capture.) */
1752    
1753            if (offset > offset_top)
1754              {
1755              register int *iptr = md->offset_vector + offset_top;
1756              register int *iend = md->offset_vector + offset;
1757              while (iptr < iend) *iptr++ = -1;
1758              }
1759    
1760            /* Now make the extraction */
1761    
1762          md->offset_vector[offset] =          md->offset_vector[offset] =
1763            md->offset_vector[md->offset_end - number];            md->offset_vector[md->offset_end - number];
1764          md->offset_vector[offset+1] = (int)(eptr - md->start_subject);          md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
# Line 5859  pcre_exec(const pcre *argument_re, const Line 5811  pcre_exec(const pcre *argument_re, const
5811    PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,    PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
5812    int offsetcount)    int offsetcount)
5813  {  {
5814  int rc, resetcount, ocount;  int rc, ocount;
5815  int first_byte = -1;  int first_byte = -1;
5816  int req_byte = -1;  int req_byte = -1;
5817  int req_byte2 = -1;  int req_byte2 = -1;
# Line 6108  md->offset_max = (2*ocount)/3; Line 6060  md->offset_max = (2*ocount)/3;
6060  md->offset_overflow = FALSE;  md->offset_overflow = FALSE;
6061  md->capture_last = -1;  md->capture_last = -1;
6062    
 /* Compute the minimum number of offsets that we need to reset each time. Doing  
 this makes a huge difference to execution time when there aren't many brackets  
 in the pattern. */  
   
 resetcount = 2 + re->top_bracket * 2;  
 if (resetcount > offsetcount) resetcount = ocount;  
   
6063  /* Reset the working variable associated with each extraction. These should  /* Reset the working variable associated with each extraction. These should
6064  never be used unless previously set, but they get saved and restored, and so we  never be used unless previously set, but they get saved and restored, and so we
6065  initialize them to avoid reading uninitialized locations. */  initialize them to avoid reading uninitialized locations. Also, unset the
6066    offsets for the matched string. This is really just for tidiness with callouts,
6067    in case they inspect these fields. */
6068    
6069  if (md->offset_vector != NULL)  if (md->offset_vector != NULL)
6070    {    {
6071    register int *iptr = md->offset_vector + ocount;    register int *iptr = md->offset_vector + ocount;
6072    register int *iend = iptr - resetcount/2 + 1;    register int *iend = iptr - re->top_bracket;
6073      if (iend < md->offset_vector + 2) iend = md->offset_vector + 2;
6074    while (--iptr >= iend) *iptr = -1;    while (--iptr >= iend) *iptr = -1;
6075      md->offset_vector[0] = md->offset_vector[1] = -1;
6076    }    }
6077    
6078  /* Set up the first character to match, if available. The first_byte value is  /* Set up the first character to match, if available. The first_byte value is
# Line 6157  if ((re->flags & PCRE_REQCHSET) != 0) Line 6106  if ((re->flags & PCRE_REQCHSET) != 0)
6106    }    }
6107    
6108    
6109    
6110    
6111  /* ==========================================================================*/  /* ==========================================================================*/
6112    
6113  /* Loop for handling unanchored repeated matching attempts; for anchored regexs  /* Loop for handling unanchored repeated matching attempts; for anchored regexs
# Line 6167  for(;;) Line 6118  for(;;)
6118    USPTR save_end_subject = end_subject;    USPTR save_end_subject = end_subject;
6119    USPTR new_start_match;    USPTR new_start_match;
6120    
   /* Reset the maximum number of extractions we might see. */  
   
   if (md->offset_vector != NULL)  
     {  
     register int *iptr = md->offset_vector;  
     register int *iend = iptr + resetcount;  
     while (iptr < iend) *iptr++ = -1;  
     }  
   
6121    /* If firstline is TRUE, the start of the match is constrained to the first    /* If firstline is TRUE, the start of the match is constrained to the first
6122    line of a multiline string. That is, the match must be before or at the first    line of a multiline string. That is, the match must be before or at the first
6123    newline. Implement this by temporarily adjusting end_subject so that we stop    newline. Implement this by temporarily adjusting end_subject so that we stop

Legend:
Removed from v.614  
changed lines
  Added in v.615

  ViewVC Help
Powered by ViewVC 1.1.5