/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 626 by ph10, Wed Jul 20 17:51:54 2011 UTC revision 667 by ph10, Mon Aug 22 14:57:32 2011 UTC
# Line 1070  for (;;) Line 1070  for (;;)
1070        if (pcre_callout != NULL)        if (pcre_callout != NULL)
1071          {          {
1072          pcre_callout_block cb;          pcre_callout_block cb;
1073          cb.version          = 1;   /* Version 1 of the callout block */          cb.version          = 2;   /* Version 1 of the callout block */
1074          cb.callout_number   = ecode[LINK_SIZE+2];          cb.callout_number   = ecode[LINK_SIZE+2];
1075          cb.offset_vector    = md->offset_vector;          cb.offset_vector    = md->offset_vector;
1076          cb.subject          = (PCRE_SPTR)md->start_subject;          cb.subject          = (PCRE_SPTR)md->start_subject;
# Line 1082  for (;;) Line 1082  for (;;)
1082          cb.capture_top      = offset_top/2;          cb.capture_top      = offset_top/2;
1083          cb.capture_last     = md->capture_last;          cb.capture_last     = md->capture_last;
1084          cb.callout_data     = md->callout_data;          cb.callout_data     = md->callout_data;
1085            cb.mark             = markptr;
1086          if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);          if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
1087          if (rrc < 0) RRETURN(rrc);          if (rrc < 0) RRETURN(rrc);
1088          }          }
# Line 1365  for (;;) Line 1366  for (;;)
1366        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1367          {          {
1368          mstart = md->start_match_ptr;   /* In case \K reset it */          mstart = md->start_match_ptr;   /* In case \K reset it */
1369            markptr = md->mark;
1370          break;          break;
1371          }          }
1372        if (rrc != MATCH_NOMATCH &&        if (rrc != MATCH_NOMATCH &&
# Line 1463  for (;;) Line 1465  for (;;)
1465      if (pcre_callout != NULL)      if (pcre_callout != NULL)
1466        {        {
1467        pcre_callout_block cb;        pcre_callout_block cb;
1468        cb.version          = 1;   /* Version 1 of the callout block */        cb.version          = 2;   /* Version 1 of the callout block */
1469        cb.callout_number   = ecode[1];        cb.callout_number   = ecode[1];
1470        cb.offset_vector    = md->offset_vector;        cb.offset_vector    = md->offset_vector;
1471        cb.subject          = (PCRE_SPTR)md->start_subject;        cb.subject          = (PCRE_SPTR)md->start_subject;
# Line 1475  for (;;) Line 1477  for (;;)
1477        cb.capture_top      = offset_top/2;        cb.capture_top      = offset_top/2;
1478        cb.capture_last     = md->capture_last;        cb.capture_last     = md->capture_last;
1479        cb.callout_data     = md->callout_data;        cb.callout_data     = md->callout_data;
1480          cb.mark             = markptr;
1481        if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);        if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
1482        if (rrc < 0) RRETURN(rrc);        if (rrc < 0) RRETURN(rrc);
1483        }        }
# Line 1500  for (;;) Line 1503  for (;;)
1503    
1504      case OP_RECURSE:      case OP_RECURSE:
1505        {        {
1506          recursion_info *ri;
1507          int recno;
1508    
1509        callpat = md->start_code + GET(ecode, 1);        callpat = md->start_code + GET(ecode, 1);
1510        new_recursive.group_num = (callpat == md->start_code)? 0 :        recno = (callpat == md->start_code)? 0 :
1511          GET2(callpat, 1 + LINK_SIZE);          GET2(callpat, 1 + LINK_SIZE);
1512    
1513          /* Check for repeating a recursion without advancing the subject pointer.
1514          This should catch convoluted mutual recursions. (Some simple cases are
1515          caught at compile time.) */
1516    
1517          for (ri = md->recursive; ri != NULL; ri = ri->prevrec)
1518            if (recno == ri->group_num && eptr == ri->subject_position)
1519              RRETURN(PCRE_ERROR_RECURSELOOP);
1520    
1521        /* Add to "recursing stack" */        /* Add to "recursing stack" */
1522    
1523          new_recursive.group_num = recno;
1524          new_recursive.subject_position = eptr;
1525        new_recursive.prevrec = md->recursive;        new_recursive.prevrec = md->recursive;
1526        md->recursive = &new_recursive;        md->recursive = &new_recursive;
1527    
# Line 1653  for (;;) Line 1669  for (;;)
1669        md->end_match_ptr = eptr;      /* For ONCE */        md->end_match_ptr = eptr;      /* For ONCE */
1670        md->end_offset_top = offset_top;        md->end_offset_top = offset_top;
1671        md->start_match_ptr = mstart;        md->start_match_ptr = mstart;
1672        MRRETURN(MATCH_MATCH);        MRRETURN(MATCH_MATCH);         /* Sets md->mark */
1673        }        }
1674    
1675      /* For capturing groups we have to check the group number back at the start      /* For capturing groups we have to check the group number back at the start
# Line 1998  for (;;) Line 2014  for (;;)
2014      /* Fall through */      /* Fall through */
2015    
2016      case OP_ALLANY:      case OP_ALLANY:
2017      if (eptr++ >= md->end_subject)      if (eptr >= md->end_subject)   /* DO NOT merge the eptr++ here; it must */
2018        {        {                            /* not be updated before SCHECK_PARTIAL. */
2019        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2020        MRRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
2021        }        }
2022        eptr++;
2023      if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;      if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
2024      ecode++;      ecode++;
2025      break;      break;
# Line 2011  for (;;) Line 2028  for (;;)
2028      any byte, even newline, independent of the setting of PCRE_DOTALL. */      any byte, even newline, independent of the setting of PCRE_DOTALL. */
2029    
2030      case OP_ANYBYTE:      case OP_ANYBYTE:
2031      if (eptr++ >= md->end_subject)      if (eptr >= md->end_subject)   /* DO NOT merge the eptr++ here; it must */
2032        {        {                            /* not be updated before SCHECK_PARTIAL. */
2033        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2034        MRRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
2035        }        }
2036        eptr++;
2037      ecode++;      ecode++;
2038      break;      break;
2039    
# Line 5164  for (;;) Line 5182  for (;;)
5182                while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;                while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
5183                }                }
5184              }              }
5185            else eptr = md->end_subject;   /* Unlimited UTF-8 repeat */            else
5186                {
5187                eptr = md->end_subject;   /* Unlimited UTF-8 repeat */
5188                SCHECK_PARTIAL();
5189                }
5190            break;            break;
5191    
5192            /* The byte case is the same as non-UTF8 */            /* The byte case is the same as non-UTF8 */
# Line 5739  pcre_exec(const pcre *argument_re, const Line 5761  pcre_exec(const pcre *argument_re, const
5761    PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,    PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
5762    int offsetcount)    int offsetcount)
5763  {  {
5764  int rc, ocount;  int rc, ocount, arg_offset_max;
5765  int first_byte = -1;  int first_byte = -1;
5766  int req_byte = -1;  int req_byte = -1;
5767  int req_byte2 = -1;  int req_byte2 = -1;
# Line 5775  if (re == NULL || subject == NULL || Line 5797  if (re == NULL || subject == NULL ||
5797  if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;  if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
5798  if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;  if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
5799    
5800  /* This information is for finding all the numbers associated with a given  /* These two settings are used in the code for checking a UTF-8 string that
5801  name, for condition testing. */  follows immediately afterwards. Other values in the md block are used only
5802    during "normal" pcre_exec() processing, not when the JIT support is in use,
5803    so they are set up later. */
5804    
5805    utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;
5806    md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :
5807                  ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;
5808    
5809    /* Check a UTF-8 string if required. Pass back the character offset and error
5810    code for an invalid string if a results vector is available. */
5811    
5812    #ifdef SUPPORT_UTF8
5813    if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
5814      {
5815      int erroroffset;
5816      int errorcode = _pcre_valid_utf8((USPTR)subject, length, &erroroffset);
5817      if (errorcode != 0)
5818        {
5819        if (offsetcount >= 2)
5820          {
5821          offsets[0] = erroroffset;
5822          offsets[1] = errorcode;
5823          }
5824        return (errorcode <= PCRE_UTF8_ERR5 && md->partial > 1)?
5825          PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
5826        }
5827    
5828      /* Check that a start_offset points to the start of a UTF-8 character. */
5829      if (start_offset > 0 && start_offset < length &&
5830          (((USPTR)subject)[start_offset] & 0xc0) == 0x80)
5831        return PCRE_ERROR_BADUTF8_OFFSET;
5832      }
5833    #endif
5834    
5835    /* If the pattern was successfully studied with JIT support, run the JIT
5836    executable instead of the rest of this function. Most options must be set at
5837    compile time for the JIT code to be usable. Fallback to the normal code path if
5838    an unsupported flag is set. In particular, JIT does not support partial
5839    matching. */
5840    
5841    #ifdef SUPPORT_JIT
5842    if (extra_data != NULL
5843        && (extra_data->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
5844        && extra_data->executable_jit != NULL
5845        && (options & ~(PCRE_NO_UTF8_CHECK | PCRE_NOTBOL | PCRE_NOTEOL |
5846                        PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART)) == 0)
5847      return _pcre_jit_exec(re, extra_data->executable_jit, subject, length,
5848        start_offset, options, offsets, offsetcount);
5849    #endif
5850    
5851    /* Carry on with non-JIT matching. This information is for finding all the
5852    numbers associated with a given name, for condition testing. */
5853    
5854  md->name_table = (uschar *)re + re->name_table_offset;  md->name_table = (uschar *)re + re->name_table_offset;
5855  md->name_count = re->name_count;  md->name_count = re->name_count;
# Line 5843  md->end_subject = md->start_subject + le Line 5916  md->end_subject = md->start_subject + le
5916  end_subject = md->end_subject;  end_subject = md->end_subject;
5917    
5918  md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;  md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
 utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;  
5919  md->use_ucp = (re->options & PCRE_UCP) != 0;  md->use_ucp = (re->options & PCRE_UCP) != 0;
5920  md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;  md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
5921    
# Line 5854  md->notbol = (options & PCRE_NOTBOL) != Line 5926  md->notbol = (options & PCRE_NOTBOL) !=
5926  md->noteol = (options & PCRE_NOTEOL) != 0;  md->noteol = (options & PCRE_NOTEOL) != 0;
5927  md->notempty = (options & PCRE_NOTEMPTY) != 0;  md->notempty = (options & PCRE_NOTEMPTY) != 0;
5928  md->notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;  md->notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
 md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :  
               ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;  
   
5929    
5930  md->hitend = FALSE;  md->hitend = FALSE;
5931  md->mark = NULL;                        /* In case never set */  md->mark = NULL;                        /* In case never set */
# Line 5939  defined (though never set). So there's n Line 6008  defined (though never set). So there's n
6008  if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)  if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)
6009    return PCRE_ERROR_BADPARTIAL;    return PCRE_ERROR_BADPARTIAL;
6010    
 /* Check a UTF-8 string if required. Pass back the character offset and error  
 code for an invalid string if a results vector is available. */  
   
 #ifdef SUPPORT_UTF8  
 if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)  
   {  
   int erroroffset;  
   int errorcode = _pcre_valid_utf8((USPTR)subject, length, &erroroffset);  
   if (errorcode != 0)  
     {  
     if (offsetcount >= 2)  
       {  
       offsets[0] = erroroffset;  
       offsets[1] = errorcode;  
       }  
     return (errorcode <= PCRE_UTF8_ERR5 && md->partial > 1)?  
       PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;  
     }  
   
   /* Check that a start_offset points to the start of a UTF-8 character. */  
   
   if (start_offset > 0 && start_offset < length &&  
       (((USPTR)subject)[start_offset] & 0xc0) == 0x80)  
     return PCRE_ERROR_BADUTF8_OFFSET;  
   }  
 #endif  
   
6011  /* If the expression has got more back references than the offsets supplied can  /* If the expression has got more back references than the offsets supplied can
6012  hold, we get a temporary chunk of working store to use during the matching.  hold, we get a temporary chunk of working store to use during the matching.
6013  Otherwise, we can use the vector supplied, rounding down its size to a multiple  Otherwise, we can use the vector supplied, rounding down its size to a multiple
6014  of 3. */  of 3. */
6015    
6016  ocount = offsetcount - (offsetcount % 3);  ocount = offsetcount - (offsetcount % 3);
6017    arg_offset_max = (2*ocount)/3;
6018    
6019  if (re->top_backref > 0 && re->top_backref >= ocount/3)  if (re->top_backref > 0 && re->top_backref >= ocount/3)
6020    {    {
# Line 6346  if (rc == MATCH_MATCH || rc == MATCH_ACC Line 6389  if (rc == MATCH_MATCH || rc == MATCH_ACC
6389    {    {
6390    if (using_temporary_offsets)    if (using_temporary_offsets)
6391      {      {
6392      if (offsetcount >= 4)      if (arg_offset_max >= 4)
6393        {        {
6394        memcpy(offsets + 2, md->offset_vector + 2,        memcpy(offsets + 2, md->offset_vector + 2,
6395          (offsetcount - 2) * sizeof(int));          (arg_offset_max - 2) * sizeof(int));
6396        DPRINTF(("Copied offsets from temporary memory\n"));        DPRINTF(("Copied offsets from temporary memory\n"));
6397        }        }
6398      if (md->end_offset_top > offsetcount) md->offset_overflow = TRUE;      if (md->end_offset_top > arg_offset_max) md->offset_overflow = TRUE;
6399      DPRINTF(("Freeing temporary memory\n"));      DPRINTF(("Freeing temporary memory\n"));
6400      (pcre_free)(md->offset_vector);      (pcre_free)(md->offset_vector);
6401      }      }
6402    
6403    /* Set the return code to the number of captured strings, or 0 if there are    /* Set the return code to the number of captured strings, or 0 if there were
6404    too many to fit into the vector. */    too many to fit into the vector. */
6405    
6406    rc = md->offset_overflow? 0 : md->end_offset_top/2;    rc = (md->offset_overflow && md->end_offset_top >= arg_offset_max)?
6407        0 : md->end_offset_top/2;
6408    
6409    /* If there is space in the offset vector, set any unused pairs at the end of    /* If there is space in the offset vector, set any unused pairs at the end of
6410    the pattern to -1 for backwards compatibility. It is documented that this    the pattern to -1 for backwards compatibility. It is documented that this
6411    happens. In earlier versions, the whole set of potential capturing offsets    happens. In earlier versions, the whole set of potential capturing offsets
6412    was set to -1 each time round the loop, but this is handled differently now.    was set to -1 each time round the loop, but this is handled differently now.
6413    "Gaps" are set to -1 dynamically instead (this fixes a bug). Thus, it is only    "Gaps" are set to -1 dynamically instead (this fixes a bug). Thus, it is only
6414    those at the end that need unsetting here. We can't just unset them all at    those at the end that need unsetting here. We can't just unset them all at
6415    the start of the whole thing because they may get set in one branch that is    the start of the whole thing because they may get set in one branch that is
6416    not the final matching branch. */    not the final matching branch. */

Legend:
Removed from v.626  
changed lines
  Added in v.667

  ViewVC Help
Powered by ViewVC 1.1.5