/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 613 by ph10, Sat Jul 2 16:59:52 2011 UTC revision 615 by ph10, Mon Jul 11 14:23:06 2011 UTC
# Line 1601  for (;;) Line 1601  for (;;)
1601    
1602      if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);      if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
1603    
1604      /* Continue as from after the assertion, updating the offsets high water      /* Continue after the group, updating the offsets high water mark, since
1605      mark, since extracts may have been taken. */      extracts may have been taken. */
1606    
1607      do ecode += GET(ecode, 1); while (*ecode == OP_ALT);      do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1608    
# Line 1740  for (;;) Line 1740  for (;;)
1740        md->capture_last = number;        md->capture_last = number;
1741        if (offset >= md->offset_max) md->offset_overflow = TRUE; else        if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1742          {          {
1743            /* If offset is greater than offset_top, it means that we are
1744            "skipping" a capturing group, and that group's offsets must be marked
1745            unset. In earlier versions of PCRE, all the offsets were unset at the
1746            start of matching, but this doesn't work because atomic groups and
1747            assertions can cause a value to be set that should later be unset.
1748            Example: matching /(?>(a))b|(a)c/ against "ac". This sets group 1 as
1749            part of the atomic group, but this is not on the final matching path,
1750            so must be unset when 2 is set. (If there is no group 2, there is no
1751            problem, because offset_top will then be 2, indicating no capture.) */
1752    
1753            if (offset > offset_top)
1754              {
1755              register int *iptr = md->offset_vector + offset_top;
1756              register int *iend = md->offset_vector + offset;
1757              while (iptr < iend) *iptr++ = -1;
1758              }
1759    
1760            /* Now make the extraction */
1761    
1762          md->offset_vector[offset] =          md->offset_vector[offset] =
1763            md->offset_vector[md->offset_end - number];            md->offset_vector[md->offset_end - number];
1764          md->offset_vector[offset+1] = (int)(eptr - md->start_subject);          md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
# Line 5792  pcre_exec(const pcre *argument_re, const Line 5811  pcre_exec(const pcre *argument_re, const
5811    PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,    PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
5812    int offsetcount)    int offsetcount)
5813  {  {
5814  int rc, resetcount, ocount;  int rc, ocount;
5815  int first_byte = -1;  int first_byte = -1;
5816  int req_byte = -1;  int req_byte = -1;
5817  int req_byte2 = -1;  int req_byte2 = -1;
# Line 6041  md->offset_max = (2*ocount)/3; Line 6060  md->offset_max = (2*ocount)/3;
6060  md->offset_overflow = FALSE;  md->offset_overflow = FALSE;
6061  md->capture_last = -1;  md->capture_last = -1;
6062    
 /* Compute the minimum number of offsets that we need to reset each time. Doing  
 this makes a huge difference to execution time when there aren't many brackets  
 in the pattern. */  
   
 resetcount = 2 + re->top_bracket * 2;  
 if (resetcount > offsetcount) resetcount = ocount;  
   
6063  /* Reset the working variable associated with each extraction. These should  /* Reset the working variable associated with each extraction. These should
6064  never be used unless previously set, but they get saved and restored, and so we  never be used unless previously set, but they get saved and restored, and so we
6065  initialize them to avoid reading uninitialized locations. */  initialize them to avoid reading uninitialized locations. Also, unset the
6066    offsets for the matched string. This is really just for tidiness with callouts,
6067    in case they inspect these fields. */
6068    
6069  if (md->offset_vector != NULL)  if (md->offset_vector != NULL)
6070    {    {
6071    register int *iptr = md->offset_vector + ocount;    register int *iptr = md->offset_vector + ocount;
6072    register int *iend = iptr - resetcount/2 + 1;    register int *iend = iptr - re->top_bracket;
6073      if (iend < md->offset_vector + 2) iend = md->offset_vector + 2;
6074    while (--iptr >= iend) *iptr = -1;    while (--iptr >= iend) *iptr = -1;
6075      md->offset_vector[0] = md->offset_vector[1] = -1;
6076    }    }
6077    
6078  /* Set up the first character to match, if available. The first_byte value is  /* Set up the first character to match, if available. The first_byte value is
# Line 6090  if ((re->flags & PCRE_REQCHSET) != 0) Line 6106  if ((re->flags & PCRE_REQCHSET) != 0)
6106    }    }
6107    
6108    
6109    
6110    
6111  /* ==========================================================================*/  /* ==========================================================================*/
6112    
6113  /* Loop for handling unanchored repeated matching attempts; for anchored regexs  /* Loop for handling unanchored repeated matching attempts; for anchored regexs
# Line 6100  for(;;) Line 6118  for(;;)
6118    USPTR save_end_subject = end_subject;    USPTR save_end_subject = end_subject;
6119    USPTR new_start_match;    USPTR new_start_match;
6120    
   /* Reset the maximum number of extractions we might see. */  
   
   if (md->offset_vector != NULL)  
     {  
     register int *iptr = md->offset_vector;  
     register int *iend = iptr + resetcount;  
     while (iptr < iend) *iptr++ = -1;  
     }  
   
6121    /* If firstline is TRUE, the start of the match is constrained to the first    /* If firstline is TRUE, the start of the match is constrained to the first
6122    line of a multiline string. That is, the match must be before or at the first    line of a multiline string. That is, the match must be before or at the first
6123    newline. Implement this by temporarily adjusting end_subject so that we stop    newline. Implement this by temporarily adjusting end_subject so that we stop
# Line 6298  for(;;) Line 6307  for(;;)
6307    md->start_used_ptr = start_match;    md->start_used_ptr = start_match;
6308    md->match_call_count = 0;    md->match_call_count = 0;
6309    md->match_function_type = 0;    md->match_function_type = 0;
6310      md->end_offset_top = 0;
6311    rc = match(start_match, md->start_code, start_match, NULL, 2, md, NULL, 0);    rc = match(start_match, md->start_code, start_match, NULL, 2, md, NULL, 0);
6312    if (md->hitend && start_partial == NULL) start_partial = md->start_used_ptr;    if (md->hitend && start_partial == NULL) start_partial = md->start_used_ptr;
6313    

Legend:
Removed from v.613  
changed lines
  Added in v.615

  ViewVC Help
Powered by ViewVC 1.1.5