/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 630 by ph10, Fri Jul 22 10:00:10 2011 UTC revision 691 by ph10, Sun Sep 11 14:31:21 2011 UTC
# Line 1070  for (;;) Line 1070  for (;;)
1070        if (pcre_callout != NULL)        if (pcre_callout != NULL)
1071          {          {
1072          pcre_callout_block cb;          pcre_callout_block cb;
1073          cb.version          = 1;   /* Version 1 of the callout block */          cb.version          = 2;   /* Version 1 of the callout block */
1074          cb.callout_number   = ecode[LINK_SIZE+2];          cb.callout_number   = ecode[LINK_SIZE+2];
1075          cb.offset_vector    = md->offset_vector;          cb.offset_vector    = md->offset_vector;
1076          cb.subject          = (PCRE_SPTR)md->start_subject;          cb.subject          = (PCRE_SPTR)md->start_subject;
# Line 1082  for (;;) Line 1082  for (;;)
1082          cb.capture_top      = offset_top/2;          cb.capture_top      = offset_top/2;
1083          cb.capture_last     = md->capture_last;          cb.capture_last     = md->capture_last;
1084          cb.callout_data     = md->callout_data;          cb.callout_data     = md->callout_data;
1085            cb.mark             = markptr;
1086          if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);          if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
1087          if (rrc < 0) RRETURN(rrc);          if (rrc < 0) RRETURN(rrc);
1088          }          }
# Line 1464  for (;;) Line 1465  for (;;)
1465      if (pcre_callout != NULL)      if (pcre_callout != NULL)
1466        {        {
1467        pcre_callout_block cb;        pcre_callout_block cb;
1468        cb.version          = 1;   /* Version 1 of the callout block */        cb.version          = 2;   /* Version 1 of the callout block */
1469        cb.callout_number   = ecode[1];        cb.callout_number   = ecode[1];
1470        cb.offset_vector    = md->offset_vector;        cb.offset_vector    = md->offset_vector;
1471        cb.subject          = (PCRE_SPTR)md->start_subject;        cb.subject          = (PCRE_SPTR)md->start_subject;
# Line 1476  for (;;) Line 1477  for (;;)
1477        cb.capture_top      = offset_top/2;        cb.capture_top      = offset_top/2;
1478        cb.capture_last     = md->capture_last;        cb.capture_last     = md->capture_last;
1479        cb.callout_data     = md->callout_data;        cb.callout_data     = md->callout_data;
1480          cb.mark             = markptr;
1481        if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);        if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
1482        if (rrc < 0) RRETURN(rrc);        if (rrc < 0) RRETURN(rrc);
1483        }        }
# Line 1501  for (;;) Line 1503  for (;;)
1503    
1504      case OP_RECURSE:      case OP_RECURSE:
1505        {        {
1506          recursion_info *ri;
1507          int recno;
1508    
1509        callpat = md->start_code + GET(ecode, 1);        callpat = md->start_code + GET(ecode, 1);
1510        new_recursive.group_num = (callpat == md->start_code)? 0 :        recno = (callpat == md->start_code)? 0 :
1511          GET2(callpat, 1 + LINK_SIZE);          GET2(callpat, 1 + LINK_SIZE);
1512    
1513          /* Check for repeating a recursion without advancing the subject pointer.
1514          This should catch convoluted mutual recursions. (Some simple cases are
1515          caught at compile time.) */
1516    
1517          for (ri = md->recursive; ri != NULL; ri = ri->prevrec)
1518            if (recno == ri->group_num && eptr == ri->subject_position)
1519              RRETURN(PCRE_ERROR_RECURSELOOP);
1520    
1521        /* Add to "recursing stack" */        /* Add to "recursing stack" */
1522    
1523          new_recursive.group_num = recno;
1524          new_recursive.subject_position = eptr;
1525        new_recursive.prevrec = md->recursive;        new_recursive.prevrec = md->recursive;
1526        md->recursive = &new_recursive;        md->recursive = &new_recursive;
1527    
# Line 1541  for (;;) Line 1556  for (;;)
1556            md, eptrb, RM6);            md, eptrb, RM6);
1557          memcpy(md->offset_vector, new_recursive.offset_save,          memcpy(md->offset_vector, new_recursive.offset_save,
1558              new_recursive.saved_max * sizeof(int));              new_recursive.saved_max * sizeof(int));
1559            md->recursive = new_recursive.prevrec;
1560          if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)          if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1561            {            {
1562            DPRINTF(("Recursion matched\n"));            DPRINTF(("Recursion matched\n"));
           md->recursive = new_recursive.prevrec;  
1563            if (new_recursive.offset_save != stacksave)            if (new_recursive.offset_save != stacksave)
1564              (pcre_free)(new_recursive.offset_save);              (pcre_free)(new_recursive.offset_save);
1565    
# Line 1999  for (;;) Line 2014  for (;;)
2014      /* Fall through */      /* Fall through */
2015    
2016      case OP_ALLANY:      case OP_ALLANY:
2017      if (eptr++ >= md->end_subject)      if (eptr >= md->end_subject)   /* DO NOT merge the eptr++ here; it must */
2018        {        {                            /* not be updated before SCHECK_PARTIAL. */
2019        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2020        MRRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
2021        }        }
2022        eptr++;
2023      if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;      if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
2024      ecode++;      ecode++;
2025      break;      break;
# Line 2012  for (;;) Line 2028  for (;;)
2028      any byte, even newline, independent of the setting of PCRE_DOTALL. */      any byte, even newline, independent of the setting of PCRE_DOTALL. */
2029    
2030      case OP_ANYBYTE:      case OP_ANYBYTE:
2031      if (eptr++ >= md->end_subject)      if (eptr >= md->end_subject)   /* DO NOT merge the eptr++ here; it must */
2032        {        {                            /* not be updated before SCHECK_PARTIAL. */
2033        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2034        MRRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
2035        }        }
2036        eptr++;
2037      ecode++;      ecode++;
2038      break;      break;
2039    
# Line 5165  for (;;) Line 5182  for (;;)
5182                while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;                while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
5183                }                }
5184              }              }
5185            else eptr = md->end_subject;   /* Unlimited UTF-8 repeat */            else
5186                {
5187                eptr = md->end_subject;   /* Unlimited UTF-8 repeat */
5188                SCHECK_PARTIAL();
5189                }
5190            break;            break;
5191    
5192            /* The byte case is the same as non-UTF8 */            /* The byte case is the same as non-UTF8 */
# Line 5740  pcre_exec(const pcre *argument_re, const Line 5761  pcre_exec(const pcre *argument_re, const
5761    PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,    PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
5762    int offsetcount)    int offsetcount)
5763  {  {
5764  int rc, ocount;  int rc, ocount, arg_offset_max;
5765  int first_byte = -1;  int first_byte = -1;
5766  int req_byte = -1;  int req_byte = -1;
5767  int req_byte2 = -1;  int req_byte2 = -1;
# Line 5776  if (re == NULL || subject == NULL || Line 5797  if (re == NULL || subject == NULL ||
5797  if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;  if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
5798  if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;  if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
5799    
5800  /* This information is for finding all the numbers associated with a given  /* These two settings are used in the code for checking a UTF-8 string that
5801  name, for condition testing. */  follows immediately afterwards. Other values in the md block are used only
5802    during "normal" pcre_exec() processing, not when the JIT support is in use,
5803    so they are set up later. */
5804    
5805    utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;
5806    md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :
5807                  ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;
5808    
5809    /* Check a UTF-8 string if required. Pass back the character offset and error
5810    code for an invalid string if a results vector is available. */
5811    
5812    #ifdef SUPPORT_UTF8
5813    if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
5814      {
5815      int erroroffset;
5816      int errorcode = _pcre_valid_utf8((USPTR)subject, length, &erroroffset);
5817      if (errorcode != 0)
5818        {
5819        if (offsetcount >= 2)
5820          {
5821          offsets[0] = erroroffset;
5822          offsets[1] = errorcode;
5823          }
5824        return (errorcode <= PCRE_UTF8_ERR5 && md->partial > 1)?
5825          PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
5826        }
5827    
5828      /* Check that a start_offset points to the start of a UTF-8 character. */
5829      if (start_offset > 0 && start_offset < length &&
5830          (((USPTR)subject)[start_offset] & 0xc0) == 0x80)
5831        return PCRE_ERROR_BADUTF8_OFFSET;
5832      }
5833    #endif
5834    
5835    /* If the pattern was successfully studied with JIT support, run the JIT
5836    executable instead of the rest of this function. Most options must be set at
5837    compile time for the JIT code to be usable. Fallback to the normal code path if
5838    an unsupported flag is set. In particular, JIT does not support partial
5839    matching. */
5840    
5841    #ifdef SUPPORT_JIT
5842    if (extra_data != NULL
5843        && (extra_data->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
5844        && extra_data->executable_jit != NULL
5845        && (options & ~(PCRE_NO_UTF8_CHECK | PCRE_NOTBOL | PCRE_NOTEOL |
5846                        PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART)) == 0)
5847      return _pcre_jit_exec(re, extra_data->executable_jit, subject, length,
5848        start_offset, options, ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0)
5849        ? MATCH_LIMIT : extra_data->match_limit, offsets, offsetcount);
5850    #endif
5851    
5852    /* Carry on with non-JIT matching. This information is for finding all the
5853    numbers associated with a given name, for condition testing. */
5854    
5855  md->name_table = (uschar *)re + re->name_table_offset;  md->name_table = (uschar *)re + re->name_table_offset;
5856  md->name_count = re->name_count;  md->name_count = re->name_count;
# Line 5844  md->end_subject = md->start_subject + le Line 5917  md->end_subject = md->start_subject + le
5917  end_subject = md->end_subject;  end_subject = md->end_subject;
5918    
5919  md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;  md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
 utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;  
5920  md->use_ucp = (re->options & PCRE_UCP) != 0;  md->use_ucp = (re->options & PCRE_UCP) != 0;
5921  md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;  md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
5922    
# Line 5855  md->notbol = (options & PCRE_NOTBOL) != Line 5927  md->notbol = (options & PCRE_NOTBOL) !=
5927  md->noteol = (options & PCRE_NOTEOL) != 0;  md->noteol = (options & PCRE_NOTEOL) != 0;
5928  md->notempty = (options & PCRE_NOTEMPTY) != 0;  md->notempty = (options & PCRE_NOTEMPTY) != 0;
5929  md->notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;  md->notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
 md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :  
               ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;  
   
5930    
5931  md->hitend = FALSE;  md->hitend = FALSE;
5932  md->mark = NULL;                        /* In case never set */  md->mark = NULL;                        /* In case never set */
# Line 5940  defined (though never set). So there's n Line 6009  defined (though never set). So there's n
6009  if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)  if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)
6010    return PCRE_ERROR_BADPARTIAL;    return PCRE_ERROR_BADPARTIAL;
6011    
 /* Check a UTF-8 string if required. Pass back the character offset and error  
 code for an invalid string if a results vector is available. */  
   
 #ifdef SUPPORT_UTF8  
 if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)  
   {  
   int erroroffset;  
   int errorcode = _pcre_valid_utf8((USPTR)subject, length, &erroroffset);  
   if (errorcode != 0)  
     {  
     if (offsetcount >= 2)  
       {  
       offsets[0] = erroroffset;  
       offsets[1] = errorcode;  
       }  
     return (errorcode <= PCRE_UTF8_ERR5 && md->partial > 1)?  
       PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;  
     }  
   
   /* Check that a start_offset points to the start of a UTF-8 character. */  
   
   if (start_offset > 0 && start_offset < length &&  
       (((USPTR)subject)[start_offset] & 0xc0) == 0x80)  
     return PCRE_ERROR_BADUTF8_OFFSET;  
   }  
 #endif  
   
6012  /* If the expression has got more back references than the offsets supplied can  /* If the expression has got more back references than the offsets supplied can
6013  hold, we get a temporary chunk of working store to use during the matching.  hold, we get a temporary chunk of working store to use during the matching.
6014  Otherwise, we can use the vector supplied, rounding down its size to a multiple  Otherwise, we can use the vector supplied, rounding down its size to a multiple
6015  of 3. */  of 3. */
6016    
6017  ocount = offsetcount - (offsetcount % 3);  ocount = offsetcount - (offsetcount % 3);
6018    arg_offset_max = (2*ocount)/3;
6019    
6020  if (re->top_backref > 0 && re->top_backref >= ocount/3)  if (re->top_backref > 0 && re->top_backref >= ocount/3)
6021    {    {
# Line 6347  if (rc == MATCH_MATCH || rc == MATCH_ACC Line 6390  if (rc == MATCH_MATCH || rc == MATCH_ACC
6390    {    {
6391    if (using_temporary_offsets)    if (using_temporary_offsets)
6392      {      {
6393      if (offsetcount >= 4)      if (arg_offset_max >= 4)
6394        {        {
6395        memcpy(offsets + 2, md->offset_vector + 2,        memcpy(offsets + 2, md->offset_vector + 2,
6396          (offsetcount - 2) * sizeof(int));          (arg_offset_max - 2) * sizeof(int));
6397        DPRINTF(("Copied offsets from temporary memory\n"));        DPRINTF(("Copied offsets from temporary memory\n"));
6398        }        }
6399      if (md->end_offset_top > offsetcount) md->offset_overflow = TRUE;      if (md->end_offset_top > arg_offset_max) md->offset_overflow = TRUE;
6400      DPRINTF(("Freeing temporary memory\n"));      DPRINTF(("Freeing temporary memory\n"));
6401      (pcre_free)(md->offset_vector);      (pcre_free)(md->offset_vector);
6402      }      }
6403    
6404    /* Set the return code to the number of captured strings, or 0 if there are    /* Set the return code to the number of captured strings, or 0 if there were
6405    too many to fit into the vector. */    too many to fit into the vector. */
6406    
6407    rc = md->offset_overflow? 0 : md->end_offset_top/2;    rc = (md->offset_overflow && md->end_offset_top >= arg_offset_max)?
6408        0 : md->end_offset_top/2;
6409    
6410    /* If there is space in the offset vector, set any unused pairs at the end of    /* If there is space in the offset vector, set any unused pairs at the end of
6411    the pattern to -1 for backwards compatibility. It is documented that this    the pattern to -1 for backwards compatibility. It is documented that this
6412    happens. In earlier versions, the whole set of potential capturing offsets    happens. In earlier versions, the whole set of potential capturing offsets
6413    was set to -1 each time round the loop, but this is handled differently now.    was set to -1 each time round the loop, but this is handled differently now.
6414    "Gaps" are set to -1 dynamically instead (this fixes a bug). Thus, it is only    "Gaps" are set to -1 dynamically instead (this fixes a bug). Thus, it is only
6415    those at the end that need unsetting here. We can't just unset them all at    those at the end that need unsetting here. We can't just unset them all at
6416    the start of the whole thing because they may get set in one branch that is    the start of the whole thing because they may get set in one branch that is
6417    not the final matching branch. */    not the final matching branch. */

Legend:
Removed from v.630  
changed lines
  Added in v.691

  ViewVC Help
Powered by ViewVC 1.1.5