/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 645 by ph10, Sun Jul 31 17:02:18 2011 UTC revision 699 by ph10, Tue Sep 20 10:46:54 2011 UTC
# Line 1082  for (;;) Line 1082  for (;;)
1082          cb.capture_top      = offset_top/2;          cb.capture_top      = offset_top/2;
1083          cb.capture_last     = md->capture_last;          cb.capture_last     = md->capture_last;
1084          cb.callout_data     = md->callout_data;          cb.callout_data     = md->callout_data;
1085          cb.mark             = markptr;          cb.mark             = markptr;
1086          if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);          if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
1087          if (rrc < 0) RRETURN(rrc);          if (rrc < 0) RRETURN(rrc);
1088          }          }
# Line 1275  for (;;) Line 1275  for (;;)
1275        {        {
1276        if (op == OP_SCOND) md->match_function_type = MATCH_CBEGROUP;        if (op == OP_SCOND) md->match_function_type = MATCH_CBEGROUP;
1277        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM49);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM49);
1278        if (rrc == MATCH_THEN && md->start_match_ptr == ecode)  
1279          rrc = MATCH_NOMATCH;        /* If the result is THEN from within the "true" branch of the condition,
1280          md->start_match_ptr will point to the original OP_COND, not to the start
1281          of the branch, so we have do work to see if it matches. If THEN comes
1282          from the "false" branch, md->start_match_ptr does point to OP_ALT. */
1283    
1284          if (rrc == MATCH_THEN)
1285            {
1286            if (*ecode != OP_ALT)
1287              {
1288              do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1289              ecode -= GET(ecode, 1);
1290              }
1291            if (md->start_match_ptr == ecode) rrc = MATCH_NOMATCH;
1292            }
1293        RRETURN(rrc);        RRETURN(rrc);
1294        }        }
1295      else                         /* Condition false & no alternative */      else                         /* Condition false & no alternative */
# Line 1477  for (;;) Line 1490  for (;;)
1490        cb.capture_top      = offset_top/2;        cb.capture_top      = offset_top/2;
1491        cb.capture_last     = md->capture_last;        cb.capture_last     = md->capture_last;
1492        cb.callout_data     = md->callout_data;        cb.callout_data     = md->callout_data;
1493        cb.mark             = markptr;        cb.mark             = markptr;
1494        if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);        if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
1495        if (rrc < 0) RRETURN(rrc);        if (rrc < 0) RRETURN(rrc);
1496        }        }
# Line 1505  for (;;) Line 1518  for (;;)
1518        {        {
1519        recursion_info *ri;        recursion_info *ri;
1520        int recno;        int recno;
1521    
1522        callpat = md->start_code + GET(ecode, 1);        callpat = md->start_code + GET(ecode, 1);
1523        recno = (callpat == md->start_code)? 0 :        recno = (callpat == md->start_code)? 0 :
1524          GET2(callpat, 1 + LINK_SIZE);          GET2(callpat, 1 + LINK_SIZE);
1525    
1526        /* Check for repeating a recursion without advancing the subject pointer.        /* Check for repeating a recursion without advancing the subject pointer.
1527        This should catch convoluted mutual recursions. (Some simple cases are        This should catch convoluted mutual recursions. (Some simple cases are
1528        caught at compile time.) */        caught at compile time.) */
1529    
1530        for (ri = md->recursive; ri != NULL; ri = ri->prevrec)        for (ri = md->recursive; ri != NULL; ri = ri->prevrec)
1531          if (recno == ri->group_num && eptr == ri->subject_position)          if (recno == ri->group_num && eptr == ri->subject_position)
1532            RRETURN(PCRE_ERROR_RECURSELOOP);            RRETURN(PCRE_ERROR_RECURSELOOP);
1533    
1534        /* Add to "recursing stack" */        /* Add to "recursing stack" */
# Line 1556  for (;;) Line 1569  for (;;)
1569            md, eptrb, RM6);            md, eptrb, RM6);
1570          memcpy(md->offset_vector, new_recursive.offset_save,          memcpy(md->offset_vector, new_recursive.offset_save,
1571              new_recursive.saved_max * sizeof(int));              new_recursive.saved_max * sizeof(int));
1572            md->recursive = new_recursive.prevrec;
1573          if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)          if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1574            {            {
1575            DPRINTF(("Recursion matched\n"));            DPRINTF(("Recursion matched\n"));
           md->recursive = new_recursive.prevrec;  
1576            if (new_recursive.offset_save != stacksave)            if (new_recursive.offset_save != stacksave)
1577              (pcre_free)(new_recursive.offset_save);              (pcre_free)(new_recursive.offset_save);
1578    
# Line 2014  for (;;) Line 2027  for (;;)
2027      /* Fall through */      /* Fall through */
2028    
2029      case OP_ALLANY:      case OP_ALLANY:
2030      if (eptr++ >= md->end_subject)      if (eptr >= md->end_subject)   /* DO NOT merge the eptr++ here; it must */
2031        {        {                            /* not be updated before SCHECK_PARTIAL. */
2032        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2033        MRRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
2034        }        }
2035        eptr++;
2036      if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;      if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
2037      ecode++;      ecode++;
2038      break;      break;
# Line 2027  for (;;) Line 2041  for (;;)
2041      any byte, even newline, independent of the setting of PCRE_DOTALL. */      any byte, even newline, independent of the setting of PCRE_DOTALL. */
2042    
2043      case OP_ANYBYTE:      case OP_ANYBYTE:
2044      if (eptr++ >= md->end_subject)      if (eptr >= md->end_subject)   /* DO NOT merge the eptr++ here; it must */
2045        {        {                            /* not be updated before SCHECK_PARTIAL. */
2046        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2047        MRRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
2048        }        }
2049        eptr++;
2050      ecode++;      ecode++;
2051      break;      break;
2052    
# Line 5180  for (;;) Line 5195  for (;;)
5195                while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;                while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
5196                }                }
5197              }              }
5198            else eptr = md->end_subject;   /* Unlimited UTF-8 repeat */            else
5199                {
5200                eptr = md->end_subject;   /* Unlimited UTF-8 repeat */
5201                SCHECK_PARTIAL();
5202                }
5203            break;            break;
5204    
5205            /* The byte case is the same as non-UTF8 */            /* The byte case is the same as non-UTF8 */
# Line 5755  pcre_exec(const pcre *argument_re, const Line 5774  pcre_exec(const pcre *argument_re, const
5774    PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,    PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
5775    int offsetcount)    int offsetcount)
5776  {  {
5777  int rc, ocount;  int rc, ocount, arg_offset_max;
5778  int first_byte = -1;  int first_byte = -1;
5779  int req_byte = -1;  int req_byte = -1;
5780  int req_byte2 = -1;  int req_byte2 = -1;
# Line 5791  if (re == NULL || subject == NULL || Line 5810  if (re == NULL || subject == NULL ||
5810  if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;  if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
5811  if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;  if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
5812    
5813  /* This information is for finding all the numbers associated with a given  /* These two settings are used in the code for checking a UTF-8 string that
5814  name, for condition testing. */  follows immediately afterwards. Other values in the md block are used only
5815    during "normal" pcre_exec() processing, not when the JIT support is in use,
5816    so they are set up later. */
5817    
5818    utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;
5819    md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :
5820                  ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;
5821    
5822    /* Check a UTF-8 string if required. Pass back the character offset and error
5823    code for an invalid string if a results vector is available. */
5824    
5825    #ifdef SUPPORT_UTF8
5826    if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
5827      {
5828      int erroroffset;
5829      int errorcode = _pcre_valid_utf8((USPTR)subject, length, &erroroffset);
5830      if (errorcode != 0)
5831        {
5832        if (offsetcount >= 2)
5833          {
5834          offsets[0] = erroroffset;
5835          offsets[1] = errorcode;
5836          }
5837        return (errorcode <= PCRE_UTF8_ERR5 && md->partial > 1)?
5838          PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
5839        }
5840    
5841      /* Check that a start_offset points to the start of a UTF-8 character. */
5842      if (start_offset > 0 && start_offset < length &&
5843          (((USPTR)subject)[start_offset] & 0xc0) == 0x80)
5844        return PCRE_ERROR_BADUTF8_OFFSET;
5845      }
5846    #endif
5847    
5848    /* If the pattern was successfully studied with JIT support, run the JIT
5849    executable instead of the rest of this function. Most options must be set at
5850    compile time for the JIT code to be usable. Fallback to the normal code path if
5851    an unsupported flag is set. In particular, JIT does not support partial
5852    matching. */
5853    
5854    #ifdef SUPPORT_JIT
5855    if (extra_data != NULL
5856        && (extra_data->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
5857        && extra_data->executable_jit != NULL
5858        && (options & ~(PCRE_NO_UTF8_CHECK | PCRE_NOTBOL | PCRE_NOTEOL |
5859                        PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART)) == 0)
5860      return _pcre_jit_exec(re, extra_data->executable_jit, subject, length,
5861        start_offset, options, ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0)
5862        ? MATCH_LIMIT : extra_data->match_limit, offsets, offsetcount);
5863    #endif
5864    
5865    /* Carry on with non-JIT matching. This information is for finding all the
5866    numbers associated with a given name, for condition testing. */
5867    
5868  md->name_table = (uschar *)re + re->name_table_offset;  md->name_table = (uschar *)re + re->name_table_offset;
5869  md->name_count = re->name_count;  md->name_count = re->name_count;
# Line 5859  md->end_subject = md->start_subject + le Line 5930  md->end_subject = md->start_subject + le
5930  end_subject = md->end_subject;  end_subject = md->end_subject;
5931    
5932  md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;  md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
 utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;  
5933  md->use_ucp = (re->options & PCRE_UCP) != 0;  md->use_ucp = (re->options & PCRE_UCP) != 0;
5934  md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;  md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
5935    
# Line 5870  md->notbol = (options & PCRE_NOTBOL) != Line 5940  md->notbol = (options & PCRE_NOTBOL) !=
5940  md->noteol = (options & PCRE_NOTEOL) != 0;  md->noteol = (options & PCRE_NOTEOL) != 0;
5941  md->notempty = (options & PCRE_NOTEMPTY) != 0;  md->notempty = (options & PCRE_NOTEMPTY) != 0;
5942  md->notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;  md->notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
 md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :  
               ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;  
   
5943    
5944  md->hitend = FALSE;  md->hitend = FALSE;
5945  md->mark = NULL;                        /* In case never set */  md->mark = NULL;                        /* In case never set */
# Line 5955  defined (though never set). So there's n Line 6022  defined (though never set). So there's n
6022  if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)  if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)
6023    return PCRE_ERROR_BADPARTIAL;    return PCRE_ERROR_BADPARTIAL;
6024    
 /* Check a UTF-8 string if required. Pass back the character offset and error  
 code for an invalid string if a results vector is available. */  
   
 #ifdef SUPPORT_UTF8  
 if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)  
   {  
   int erroroffset;  
   int errorcode = _pcre_valid_utf8((USPTR)subject, length, &erroroffset);  
   if (errorcode != 0)  
     {  
     if (offsetcount >= 2)  
       {  
       offsets[0] = erroroffset;  
       offsets[1] = errorcode;  
       }  
     return (errorcode <= PCRE_UTF8_ERR5 && md->partial > 1)?  
       PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;  
     }  
   
   /* Check that a start_offset points to the start of a UTF-8 character. */  
   
   if (start_offset > 0 && start_offset < length &&  
       (((USPTR)subject)[start_offset] & 0xc0) == 0x80)  
     return PCRE_ERROR_BADUTF8_OFFSET;  
   }  
 #endif  
   
6025  /* If the expression has got more back references than the offsets supplied can  /* If the expression has got more back references than the offsets supplied can
6026  hold, we get a temporary chunk of working store to use during the matching.  hold, we get a temporary chunk of working store to use during the matching.
6027  Otherwise, we can use the vector supplied, rounding down its size to a multiple  Otherwise, we can use the vector supplied, rounding down its size to a multiple
6028  of 3. */  of 3. */
6029    
6030  ocount = offsetcount - (offsetcount % 3);  ocount = offsetcount - (offsetcount % 3);
6031    arg_offset_max = (2*ocount)/3;
6032    
6033  if (re->top_backref > 0 && re->top_backref >= ocount/3)  if (re->top_backref > 0 && re->top_backref >= ocount/3)
6034    {    {
# Line 6362  if (rc == MATCH_MATCH || rc == MATCH_ACC Line 6403  if (rc == MATCH_MATCH || rc == MATCH_ACC
6403    {    {
6404    if (using_temporary_offsets)    if (using_temporary_offsets)
6405      {      {
6406      if (offsetcount >= 4)      if (arg_offset_max >= 4)
6407        {        {
6408        memcpy(offsets + 2, md->offset_vector + 2,        memcpy(offsets + 2, md->offset_vector + 2,
6409          (offsetcount - 2) * sizeof(int));          (arg_offset_max - 2) * sizeof(int));
6410        DPRINTF(("Copied offsets from temporary memory\n"));        DPRINTF(("Copied offsets from temporary memory\n"));
6411        }        }
6412      if (md->end_offset_top > offsetcount) md->offset_overflow = TRUE;      if (md->end_offset_top > arg_offset_max) md->offset_overflow = TRUE;
6413      DPRINTF(("Freeing temporary memory\n"));      DPRINTF(("Freeing temporary memory\n"));
6414      (pcre_free)(md->offset_vector);      (pcre_free)(md->offset_vector);
6415      }      }
6416    
6417    /* Set the return code to the number of captured strings, or 0 if there are    /* Set the return code to the number of captured strings, or 0 if there were
6418    too many to fit into the vector. */    too many to fit into the vector. */
6419    
6420    rc = md->offset_overflow? 0 : md->end_offset_top/2;    rc = (md->offset_overflow && md->end_offset_top >= arg_offset_max)?
6421        0 : md->end_offset_top/2;
6422    
6423    /* If there is space in the offset vector, set any unused pairs at the end of    /* If there is space in the offset vector, set any unused pairs at the end of
6424    the pattern to -1 for backwards compatibility. It is documented that this    the pattern to -1 for backwards compatibility. It is documented that this
6425    happens. In earlier versions, the whole set of potential capturing offsets    happens. In earlier versions, the whole set of potential capturing offsets
6426    was set to -1 each time round the loop, but this is handled differently now.    was set to -1 each time round the loop, but this is handled differently now.
6427    "Gaps" are set to -1 dynamically instead (this fixes a bug). Thus, it is only    "Gaps" are set to -1 dynamically instead (this fixes a bug). Thus, it is only
6428    those at the end that need unsetting here. We can't just unset them all at    those at the end that need unsetting here. We can't just unset them all at
6429    the start of the whole thing because they may get set in one branch that is    the start of the whole thing because they may get set in one branch that is
6430    not the final matching branch. */    not the final matching branch. */

Legend:
Removed from v.645  
changed lines
  Added in v.699

  ViewVC Help
Powered by ViewVC 1.1.5