/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 610 by ph10, Tue Jun 28 15:58:34 2011 UTC revision 617 by ph10, Tue Jul 12 11:00:10 2011 UTC
# Line 808  for (;;) Line 808  for (;;)
808      subject position in the working slot at the top of the vector. We mustn't      subject position in the working slot at the top of the vector. We mustn't
809      change the current values of the data slot, because they may be set from a      change the current values of the data slot, because they may be set from a
810      previous iteration of this group, and be referred to by a reference inside      previous iteration of this group, and be referred to by a reference inside
811      the group. If we fail to match, we need to restore this value and also the      the group. A failure to match might occur after the group has succeeded,
812      values of the final offsets, in case they were set by a previous iteration      if something later on doesn't match. For this reason, we need to restore
813      of the same bracket.      the working value and also the values of the final offsets, in case they
814        were set by a previous iteration of the same bracket.
815    
816      If there isn't enough space in the offset vector, treat this as if it were      If there isn't enough space in the offset vector, treat this as if it were
817      a non-capturing bracket. Don't worry about setting the flag for the error      a non-capturing bracket. Don't worry about setting the flag for the error
# Line 1296  for (;;) Line 1297  for (;;)
1297      recursion, continue from after the call. */      recursion, continue from after the call. */
1298    
1299      case OP_ACCEPT:      case OP_ACCEPT:
1300        case OP_ASSERT_ACCEPT:
1301      case OP_END:      case OP_END:
1302      if (md->recursive != NULL)      if (md->recursive != NULL)
1303        {        {
# Line 1311  for (;;) Line 1313  for (;;)
1313          }          }
1314        }        }
1315    
1316      /* Otherwise, if we have matched an empty string, fail if PCRE_NOTEMPTY is      /* Otherwise, if we have matched an empty string, fail if not in an
1317      set, or if PCRE_NOTEMPTY_ATSTART is set and we have matched at the start of      assertion and if either PCRE_NOTEMPTY is set, or if PCRE_NOTEMPTY_ATSTART
1318      the subject. In both cases, backtracking will then try other alternatives,      is set and we have matched at the start of the subject. In both cases,
1319      if any. */      backtracking will then try other alternatives, if any. */
1320    
1321      else if (eptr == mstart &&      else if (eptr == mstart && op != OP_ASSERT_ACCEPT &&
1322          (md->notempty ||          (md->notempty ||
1323            (md->notempty_atstart &&            (md->notempty_atstart &&
1324              mstart == md->start_subject + md->start_offset)))              mstart == md->start_subject + md->start_offset)))
# Line 1487  for (;;) Line 1489  for (;;)
1489      65535 such values, which is too large to put on the stack, but using malloc      65535 such values, which is too large to put on the stack, but using malloc
1490      for small numbers seems expensive. As a compromise, the stack is used when      for small numbers seems expensive. As a compromise, the stack is used when
1491      there are no more than REC_STACK_SAVE_MAX values to store; otherwise malloc      there are no more than REC_STACK_SAVE_MAX values to store; otherwise malloc
1492      is used. A problem is what to do if the malloc fails ... there is no way of      is used.
     returning to the top level with an error. Save the top REC_STACK_SAVE_MAX  
     values on the stack, and accept that the rest may be wrong.  
1493    
1494      There are also other values that have to be saved. We use a chained      There are also other values that have to be saved. We use a chained
1495      sequence of blocks that actually live on the stack. Thanks to Robin Houston      sequence of blocks that actually live on the stack. Thanks to Robin Houston
# Line 1600  for (;;) Line 1600  for (;;)
1600    
1601      if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);      if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
1602    
1603      /* Continue as from after the assertion, updating the offsets high water      /* Continue after the group, updating the offsets high water mark, since
1604      mark, since extracts may have been taken. */      extracts may have been taken. */
1605    
1606      do ecode += GET(ecode, 1); while (*ecode == OP_ALT);      do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1607    
# Line 1611  for (;;) Line 1611  for (;;)
1611      /* For a non-repeating ket, just continue at this level. This also      /* For a non-repeating ket, just continue at this level. This also
1612      happens for a repeating ket if no characters were matched in the group.      happens for a repeating ket if no characters were matched in the group.
1613      This is the forcible breaking of infinite loops as implemented in Perl      This is the forcible breaking of infinite loops as implemented in Perl
1614      5.005. If there is an options reset, it will get obeyed in the normal      5.005. */
     course of events. */  
1615    
1616      if (*ecode == OP_KET || eptr == saved_eptr)      if (*ecode == OP_KET || eptr == saved_eptr)
1617        {        {
# Line 1629  for (;;) Line 1628  for (;;)
1628        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM8);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM8);
1629        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1630        ecode = prev;        ecode = prev;
       goto TAIL_RECURSE;  
1631        }        }
1632      else  /* OP_KETRMAX */      else  /* OP_KETRMAX */
1633        {        {
# Line 1637  for (;;) Line 1635  for (;;)
1635        RMATCH(eptr, prev, offset_top, md, eptrb, RM9);        RMATCH(eptr, prev, offset_top, md, eptrb, RM9);
1636        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1637        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
       goto TAIL_RECURSE;  
1638        }        }
1639        goto TAIL_RECURSE;
1640    
1641      /* Control never gets here */      /* Control never gets here */
1642    
1643      /* An alternation is the end of a branch; scan along to find the end of the      /* An alternation is the end of a branch; scan along to find the end of the
# Line 1739  for (;;) Line 1738  for (;;)
1738        md->capture_last = number;        md->capture_last = number;
1739        if (offset >= md->offset_max) md->offset_overflow = TRUE; else        if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1740          {          {
1741            /* If offset is greater than offset_top, it means that we are
1742            "skipping" a capturing group, and that group's offsets must be marked
1743            unset. In earlier versions of PCRE, all the offsets were unset at the
1744            start of matching, but this doesn't work because atomic groups and
1745            assertions can cause a value to be set that should later be unset.
1746            Example: matching /(?>(a))b|(a)c/ against "ac". This sets group 1 as
1747            part of the atomic group, but this is not on the final matching path,
1748            so must be unset when 2 is set. (If there is no group 2, there is no
1749            problem, because offset_top will then be 2, indicating no capture.) */
1750    
1751            if (offset > offset_top)
1752              {
1753              register int *iptr = md->offset_vector + offset_top;
1754              register int *iend = md->offset_vector + offset;
1755              while (iptr < iend) *iptr++ = -1;
1756              }
1757    
1758            /* Now make the extraction */
1759    
1760          md->offset_vector[offset] =          md->offset_vector[offset] =
1761            md->offset_vector[md->offset_end - number];            md->offset_vector[md->offset_end - number];
1762          md->offset_vector[offset+1] = (int)(eptr - md->start_subject);          md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
# Line 5791  pcre_exec(const pcre *argument_re, const Line 5809  pcre_exec(const pcre *argument_re, const
5809    PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,    PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
5810    int offsetcount)    int offsetcount)
5811  {  {
5812  int rc, resetcount, ocount;  int rc, ocount;
5813  int first_byte = -1;  int first_byte = -1;
5814  int req_byte = -1;  int req_byte = -1;
5815  int req_byte2 = -1;  int req_byte2 = -1;
# Line 5899  utf8 = md->utf8 = (re->options & PCRE_UT Line 5917  utf8 = md->utf8 = (re->options & PCRE_UT
5917  md->use_ucp = (re->options & PCRE_UCP) != 0;  md->use_ucp = (re->options & PCRE_UCP) != 0;
5918  md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;  md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
5919    
5920    /* Some options are unpacked into BOOL variables in the hope that testing
5921    them will be faster than individual option bits. */
5922    
5923  md->notbol = (options & PCRE_NOTBOL) != 0;  md->notbol = (options & PCRE_NOTBOL) != 0;
5924  md->noteol = (options & PCRE_NOTEOL) != 0;  md->noteol = (options & PCRE_NOTEOL) != 0;
5925  md->notempty = (options & PCRE_NOTEMPTY) != 0;  md->notempty = (options & PCRE_NOTEMPTY) != 0;
5926  md->notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;  md->notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
5927  md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :  md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :
5928                ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;                ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;
5929    
5930    
5931  md->hitend = FALSE;  md->hitend = FALSE;
5932  md->mark = NULL;                        /* In case never set */  md->mark = NULL;                        /* In case never set */
5933    
# Line 6035  md->offset_max = (2*ocount)/3; Line 6058  md->offset_max = (2*ocount)/3;
6058  md->offset_overflow = FALSE;  md->offset_overflow = FALSE;
6059  md->capture_last = -1;  md->capture_last = -1;
6060    
 /* Compute the minimum number of offsets that we need to reset each time. Doing  
 this makes a huge difference to execution time when there aren't many brackets  
 in the pattern. */  
   
 resetcount = 2 + re->top_bracket * 2;  
 if (resetcount > offsetcount) resetcount = ocount;  
   
6061  /* Reset the working variable associated with each extraction. These should  /* Reset the working variable associated with each extraction. These should
6062  never be used unless previously set, but they get saved and restored, and so we  never be used unless previously set, but they get saved and restored, and so we
6063  initialize them to avoid reading uninitialized locations. */  initialize them to avoid reading uninitialized locations. Also, unset the
6064    offsets for the matched string. This is really just for tidiness with callouts,
6065    in case they inspect these fields. */
6066    
6067  if (md->offset_vector != NULL)  if (md->offset_vector != NULL)
6068    {    {
6069    register int *iptr = md->offset_vector + ocount;    register int *iptr = md->offset_vector + ocount;
6070    register int *iend = iptr - resetcount/2 + 1;    register int *iend = iptr - re->top_bracket;
6071      if (iend < md->offset_vector + 2) iend = md->offset_vector + 2;
6072    while (--iptr >= iend) *iptr = -1;    while (--iptr >= iend) *iptr = -1;
6073      md->offset_vector[0] = md->offset_vector[1] = -1;
6074    }    }
6075    
6076  /* Set up the first character to match, if available. The first_byte value is  /* Set up the first character to match, if available. The first_byte value is
# Line 6084  if ((re->flags & PCRE_REQCHSET) != 0) Line 6104  if ((re->flags & PCRE_REQCHSET) != 0)
6104    }    }
6105    
6106    
6107    
6108    
6109  /* ==========================================================================*/  /* ==========================================================================*/
6110    
6111  /* Loop for handling unanchored repeated matching attempts; for anchored regexs  /* Loop for handling unanchored repeated matching attempts; for anchored regexs
# Line 6094  for(;;) Line 6116  for(;;)
6116    USPTR save_end_subject = end_subject;    USPTR save_end_subject = end_subject;
6117    USPTR new_start_match;    USPTR new_start_match;
6118    
   /* Reset the maximum number of extractions we might see. */  
   
   if (md->offset_vector != NULL)  
     {  
     register int *iptr = md->offset_vector;  
     register int *iend = iptr + resetcount;  
     while (iptr < iend) *iptr++ = -1;  
     }  
   
6119    /* If firstline is TRUE, the start of the match is constrained to the first    /* If firstline is TRUE, the start of the match is constrained to the first
6120    line of a multiline string. That is, the match must be before or at the first    line of a multiline string. That is, the match must be before or at the first
6121    newline. Implement this by temporarily adjusting end_subject so that we stop    newline. Implement this by temporarily adjusting end_subject so that we stop
# Line 6292  for(;;) Line 6305  for(;;)
6305    md->start_used_ptr = start_match;    md->start_used_ptr = start_match;
6306    md->match_call_count = 0;    md->match_call_count = 0;
6307    md->match_function_type = 0;    md->match_function_type = 0;
6308      md->end_offset_top = 0;
6309    rc = match(start_match, md->start_code, start_match, NULL, 2, md, NULL, 0);    rc = match(start_match, md->start_code, start_match, NULL, 2, md, NULL, 0);
6310    if (md->hitend && start_partial == NULL) start_partial = md->start_used_ptr;    if (md->hitend && start_partial == NULL) start_partial = md->start_used_ptr;
6311    

Legend:
Removed from v.610  
changed lines
  Added in v.617

  ViewVC Help
Powered by ViewVC 1.1.5