/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 645 by ph10, Sun Jul 31 17:02:18 2011 UTC revision 667 by ph10, Mon Aug 22 14:57:32 2011 UTC
# Line 1082  for (;;) Line 1082  for (;;)
1082          cb.capture_top      = offset_top/2;          cb.capture_top      = offset_top/2;
1083          cb.capture_last     = md->capture_last;          cb.capture_last     = md->capture_last;
1084          cb.callout_data     = md->callout_data;          cb.callout_data     = md->callout_data;
1085          cb.mark             = markptr;          cb.mark             = markptr;
1086          if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);          if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
1087          if (rrc < 0) RRETURN(rrc);          if (rrc < 0) RRETURN(rrc);
1088          }          }
# Line 1477  for (;;) Line 1477  for (;;)
1477        cb.capture_top      = offset_top/2;        cb.capture_top      = offset_top/2;
1478        cb.capture_last     = md->capture_last;        cb.capture_last     = md->capture_last;
1479        cb.callout_data     = md->callout_data;        cb.callout_data     = md->callout_data;
1480        cb.mark             = markptr;        cb.mark             = markptr;
1481        if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);        if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
1482        if (rrc < 0) RRETURN(rrc);        if (rrc < 0) RRETURN(rrc);
1483        }        }
# Line 1505  for (;;) Line 1505  for (;;)
1505        {        {
1506        recursion_info *ri;        recursion_info *ri;
1507        int recno;        int recno;
1508    
1509        callpat = md->start_code + GET(ecode, 1);        callpat = md->start_code + GET(ecode, 1);
1510        recno = (callpat == md->start_code)? 0 :        recno = (callpat == md->start_code)? 0 :
1511          GET2(callpat, 1 + LINK_SIZE);          GET2(callpat, 1 + LINK_SIZE);
1512    
1513        /* Check for repeating a recursion without advancing the subject pointer.        /* Check for repeating a recursion without advancing the subject pointer.
1514        This should catch convoluted mutual recursions. (Some simple cases are        This should catch convoluted mutual recursions. (Some simple cases are
1515        caught at compile time.) */        caught at compile time.) */
1516    
1517        for (ri = md->recursive; ri != NULL; ri = ri->prevrec)        for (ri = md->recursive; ri != NULL; ri = ri->prevrec)
1518          if (recno == ri->group_num && eptr == ri->subject_position)          if (recno == ri->group_num && eptr == ri->subject_position)
1519            RRETURN(PCRE_ERROR_RECURSELOOP);            RRETURN(PCRE_ERROR_RECURSELOOP);
1520    
1521        /* Add to "recursing stack" */        /* Add to "recursing stack" */
# Line 2014  for (;;) Line 2014  for (;;)
2014      /* Fall through */      /* Fall through */
2015    
2016      case OP_ALLANY:      case OP_ALLANY:
2017      if (eptr++ >= md->end_subject)      if (eptr >= md->end_subject)   /* DO NOT merge the eptr++ here; it must */
2018        {        {                            /* not be updated before SCHECK_PARTIAL. */
2019        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2020        MRRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
2021        }        }
2022        eptr++;
2023      if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;      if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
2024      ecode++;      ecode++;
2025      break;      break;
# Line 2027  for (;;) Line 2028  for (;;)
2028      any byte, even newline, independent of the setting of PCRE_DOTALL. */      any byte, even newline, independent of the setting of PCRE_DOTALL. */
2029    
2030      case OP_ANYBYTE:      case OP_ANYBYTE:
2031      if (eptr++ >= md->end_subject)      if (eptr >= md->end_subject)   /* DO NOT merge the eptr++ here; it must */
2032        {        {                            /* not be updated before SCHECK_PARTIAL. */
2033        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2034        MRRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
2035        }        }
2036        eptr++;
2037      ecode++;      ecode++;
2038      break;      break;
2039    
# Line 5180  for (;;) Line 5182  for (;;)
5182                while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;                while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
5183                }                }
5184              }              }
5185            else eptr = md->end_subject;   /* Unlimited UTF-8 repeat */            else
5186                {
5187                eptr = md->end_subject;   /* Unlimited UTF-8 repeat */
5188                SCHECK_PARTIAL();
5189                }
5190            break;            break;
5191    
5192            /* The byte case is the same as non-UTF8 */            /* The byte case is the same as non-UTF8 */
# Line 5755  pcre_exec(const pcre *argument_re, const Line 5761  pcre_exec(const pcre *argument_re, const
5761    PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,    PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
5762    int offsetcount)    int offsetcount)
5763  {  {
5764  int rc, ocount;  int rc, ocount, arg_offset_max;
5765  int first_byte = -1;  int first_byte = -1;
5766  int req_byte = -1;  int req_byte = -1;
5767  int req_byte2 = -1;  int req_byte2 = -1;
# Line 5791  if (re == NULL || subject == NULL || Line 5797  if (re == NULL || subject == NULL ||
5797  if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;  if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
5798  if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;  if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
5799    
5800  /* This information is for finding all the numbers associated with a given  /* These two settings are used in the code for checking a UTF-8 string that
5801  name, for condition testing. */  follows immediately afterwards. Other values in the md block are used only
5802    during "normal" pcre_exec() processing, not when the JIT support is in use,
5803    so they are set up later. */
5804    
5805    utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;
5806    md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :
5807                  ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;
5808    
5809    /* Check a UTF-8 string if required. Pass back the character offset and error
5810    code for an invalid string if a results vector is available. */
5811    
5812    #ifdef SUPPORT_UTF8
5813    if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
5814      {
5815      int erroroffset;
5816      int errorcode = _pcre_valid_utf8((USPTR)subject, length, &erroroffset);
5817      if (errorcode != 0)
5818        {
5819        if (offsetcount >= 2)
5820          {
5821          offsets[0] = erroroffset;
5822          offsets[1] = errorcode;
5823          }
5824        return (errorcode <= PCRE_UTF8_ERR5 && md->partial > 1)?
5825          PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
5826        }
5827    
5828      /* Check that a start_offset points to the start of a UTF-8 character. */
5829      if (start_offset > 0 && start_offset < length &&
5830          (((USPTR)subject)[start_offset] & 0xc0) == 0x80)
5831        return PCRE_ERROR_BADUTF8_OFFSET;
5832      }
5833    #endif
5834    
5835    /* If the pattern was successfully studied with JIT support, run the JIT
5836    executable instead of the rest of this function. Most options must be set at
5837    compile time for the JIT code to be usable. Fallback to the normal code path if
5838    an unsupported flag is set. In particular, JIT does not support partial
5839    matching. */
5840    
5841    #ifdef SUPPORT_JIT
5842    if (extra_data != NULL
5843        && (extra_data->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
5844        && extra_data->executable_jit != NULL
5845        && (options & ~(PCRE_NO_UTF8_CHECK | PCRE_NOTBOL | PCRE_NOTEOL |
5846                        PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART)) == 0)
5847      return _pcre_jit_exec(re, extra_data->executable_jit, subject, length,
5848        start_offset, options, offsets, offsetcount);
5849    #endif
5850    
5851    /* Carry on with non-JIT matching. This information is for finding all the
5852    numbers associated with a given name, for condition testing. */
5853    
5854  md->name_table = (uschar *)re + re->name_table_offset;  md->name_table = (uschar *)re + re->name_table_offset;
5855  md->name_count = re->name_count;  md->name_count = re->name_count;
# Line 5859  md->end_subject = md->start_subject + le Line 5916  md->end_subject = md->start_subject + le
5916  end_subject = md->end_subject;  end_subject = md->end_subject;
5917    
5918  md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;  md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
 utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;  
5919  md->use_ucp = (re->options & PCRE_UCP) != 0;  md->use_ucp = (re->options & PCRE_UCP) != 0;
5920  md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;  md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
5921    
# Line 5870  md->notbol = (options & PCRE_NOTBOL) != Line 5926  md->notbol = (options & PCRE_NOTBOL) !=
5926  md->noteol = (options & PCRE_NOTEOL) != 0;  md->noteol = (options & PCRE_NOTEOL) != 0;
5927  md->notempty = (options & PCRE_NOTEMPTY) != 0;  md->notempty = (options & PCRE_NOTEMPTY) != 0;
5928  md->notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;  md->notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
 md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :  
               ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;  
   
5929    
5930  md->hitend = FALSE;  md->hitend = FALSE;
5931  md->mark = NULL;                        /* In case never set */  md->mark = NULL;                        /* In case never set */
# Line 5955  defined (though never set). So there's n Line 6008  defined (though never set). So there's n
6008  if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)  if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)
6009    return PCRE_ERROR_BADPARTIAL;    return PCRE_ERROR_BADPARTIAL;
6010    
 /* Check a UTF-8 string if required. Pass back the character offset and error  
 code for an invalid string if a results vector is available. */  
   
 #ifdef SUPPORT_UTF8  
 if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)  
   {  
   int erroroffset;  
   int errorcode = _pcre_valid_utf8((USPTR)subject, length, &erroroffset);  
   if (errorcode != 0)  
     {  
     if (offsetcount >= 2)  
       {  
       offsets[0] = erroroffset;  
       offsets[1] = errorcode;  
       }  
     return (errorcode <= PCRE_UTF8_ERR5 && md->partial > 1)?  
       PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;  
     }  
   
   /* Check that a start_offset points to the start of a UTF-8 character. */  
   
   if (start_offset > 0 && start_offset < length &&  
       (((USPTR)subject)[start_offset] & 0xc0) == 0x80)  
     return PCRE_ERROR_BADUTF8_OFFSET;  
   }  
 #endif  
   
6011  /* If the expression has got more back references than the offsets supplied can  /* If the expression has got more back references than the offsets supplied can
6012  hold, we get a temporary chunk of working store to use during the matching.  hold, we get a temporary chunk of working store to use during the matching.
6013  Otherwise, we can use the vector supplied, rounding down its size to a multiple  Otherwise, we can use the vector supplied, rounding down its size to a multiple
6014  of 3. */  of 3. */
6015    
6016  ocount = offsetcount - (offsetcount % 3);  ocount = offsetcount - (offsetcount % 3);
6017    arg_offset_max = (2*ocount)/3;
6018    
6019  if (re->top_backref > 0 && re->top_backref >= ocount/3)  if (re->top_backref > 0 && re->top_backref >= ocount/3)
6020    {    {
# Line 6362  if (rc == MATCH_MATCH || rc == MATCH_ACC Line 6389  if (rc == MATCH_MATCH || rc == MATCH_ACC
6389    {    {
6390    if (using_temporary_offsets)    if (using_temporary_offsets)
6391      {      {
6392      if (offsetcount >= 4)      if (arg_offset_max >= 4)
6393        {        {
6394        memcpy(offsets + 2, md->offset_vector + 2,        memcpy(offsets + 2, md->offset_vector + 2,
6395          (offsetcount - 2) * sizeof(int));          (arg_offset_max - 2) * sizeof(int));
6396        DPRINTF(("Copied offsets from temporary memory\n"));        DPRINTF(("Copied offsets from temporary memory\n"));
6397        }        }
6398      if (md->end_offset_top > offsetcount) md->offset_overflow = TRUE;      if (md->end_offset_top > arg_offset_max) md->offset_overflow = TRUE;
6399      DPRINTF(("Freeing temporary memory\n"));      DPRINTF(("Freeing temporary memory\n"));
6400      (pcre_free)(md->offset_vector);      (pcre_free)(md->offset_vector);
6401      }      }
6402    
6403    /* Set the return code to the number of captured strings, or 0 if there are    /* Set the return code to the number of captured strings, or 0 if there were
6404    too many to fit into the vector. */    too many to fit into the vector. */
6405    
6406    rc = md->offset_overflow? 0 : md->end_offset_top/2;    rc = (md->offset_overflow && md->end_offset_top >= arg_offset_max)?
6407        0 : md->end_offset_top/2;
6408    
6409    /* If there is space in the offset vector, set any unused pairs at the end of    /* If there is space in the offset vector, set any unused pairs at the end of
6410    the pattern to -1 for backwards compatibility. It is documented that this    the pattern to -1 for backwards compatibility. It is documented that this
6411    happens. In earlier versions, the whole set of potential capturing offsets    happens. In earlier versions, the whole set of potential capturing offsets
6412    was set to -1 each time round the loop, but this is handled differently now.    was set to -1 each time round the loop, but this is handled differently now.
6413    "Gaps" are set to -1 dynamically instead (this fixes a bug). Thus, it is only    "Gaps" are set to -1 dynamically instead (this fixes a bug). Thus, it is only
6414    those at the end that need unsetting here. We can't just unset them all at    those at the end that need unsetting here. We can't just unset them all at
6415    the start of the whole thing because they may get set in one branch that is    the start of the whole thing because they may get set in one branch that is
6416    not the final matching branch. */    not the final matching branch. */

Legend:
Removed from v.645  
changed lines
  Added in v.667

  ViewVC Help
Powered by ViewVC 1.1.5