/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 648 by ph10, Mon Aug 1 11:02:08 2011 UTC revision 677 by ph10, Sun Aug 28 10:50:07 2011 UTC
# Line 1082  for (;;) Line 1082  for (;;)
1082          cb.capture_top      = offset_top/2;          cb.capture_top      = offset_top/2;
1083          cb.capture_last     = md->capture_last;          cb.capture_last     = md->capture_last;
1084          cb.callout_data     = md->callout_data;          cb.callout_data     = md->callout_data;
1085          cb.mark             = markptr;          cb.mark             = markptr;
1086          if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);          if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
1087          if (rrc < 0) RRETURN(rrc);          if (rrc < 0) RRETURN(rrc);
1088          }          }
# Line 1477  for (;;) Line 1477  for (;;)
1477        cb.capture_top      = offset_top/2;        cb.capture_top      = offset_top/2;
1478        cb.capture_last     = md->capture_last;        cb.capture_last     = md->capture_last;
1479        cb.callout_data     = md->callout_data;        cb.callout_data     = md->callout_data;
1480        cb.mark             = markptr;        cb.mark             = markptr;
1481        if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);        if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
1482        if (rrc < 0) RRETURN(rrc);        if (rrc < 0) RRETURN(rrc);
1483        }        }
# Line 1505  for (;;) Line 1505  for (;;)
1505        {        {
1506        recursion_info *ri;        recursion_info *ri;
1507        int recno;        int recno;
1508    
1509        callpat = md->start_code + GET(ecode, 1);        callpat = md->start_code + GET(ecode, 1);
1510        recno = (callpat == md->start_code)? 0 :        recno = (callpat == md->start_code)? 0 :
1511          GET2(callpat, 1 + LINK_SIZE);          GET2(callpat, 1 + LINK_SIZE);
1512    
1513        /* Check for repeating a recursion without advancing the subject pointer.        /* Check for repeating a recursion without advancing the subject pointer.
1514        This should catch convoluted mutual recursions. (Some simple cases are        This should catch convoluted mutual recursions. (Some simple cases are
1515        caught at compile time.) */        caught at compile time.) */
1516    
1517        for (ri = md->recursive; ri != NULL; ri = ri->prevrec)        for (ri = md->recursive; ri != NULL; ri = ri->prevrec)
1518          if (recno == ri->group_num && eptr == ri->subject_position)          if (recno == ri->group_num && eptr == ri->subject_position)
1519            RRETURN(PCRE_ERROR_RECURSELOOP);            RRETURN(PCRE_ERROR_RECURSELOOP);
1520    
1521        /* Add to "recursing stack" */        /* Add to "recursing stack" */
# Line 2033  for (;;) Line 2033  for (;;)
2033        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2034        MRRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
2035        }        }
2036      eptr++;      eptr++;
2037      ecode++;      ecode++;
2038      break;      break;
2039    
# Line 5182  for (;;) Line 5182  for (;;)
5182                while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;                while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
5183                }                }
5184              }              }
5185            else eptr = md->end_subject;   /* Unlimited UTF-8 repeat */            else
5186                {
5187                eptr = md->end_subject;   /* Unlimited UTF-8 repeat */
5188                SCHECK_PARTIAL();
5189                }
5190            break;            break;
5191    
5192            /* The byte case is the same as non-UTF8 */            /* The byte case is the same as non-UTF8 */
# Line 5757  pcre_exec(const pcre *argument_re, const Line 5761  pcre_exec(const pcre *argument_re, const
5761    PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,    PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
5762    int offsetcount)    int offsetcount)
5763  {  {
5764  int rc, ocount;  int rc, ocount, arg_offset_max;
5765  int first_byte = -1;  int first_byte = -1;
5766  int req_byte = -1;  int req_byte = -1;
5767  int req_byte2 = -1;  int req_byte2 = -1;
# Line 5793  if (re == NULL || subject == NULL || Line 5797  if (re == NULL || subject == NULL ||
5797  if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;  if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
5798  if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;  if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
5799    
5800  /* This information is for finding all the numbers associated with a given  /* These two settings are used in the code for checking a UTF-8 string that
5801  name, for condition testing. */  follows immediately afterwards. Other values in the md block are used only
5802    during "normal" pcre_exec() processing, not when the JIT support is in use,
5803    so they are set up later. */
5804    
5805    utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;
5806    md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :
5807                  ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;
5808    
5809    /* Check a UTF-8 string if required. Pass back the character offset and error
5810    code for an invalid string if a results vector is available. */
5811    
5812    #ifdef SUPPORT_UTF8
5813    if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
5814      {
5815      int erroroffset;
5816      int errorcode = _pcre_valid_utf8((USPTR)subject, length, &erroroffset);
5817      if (errorcode != 0)
5818        {
5819        if (offsetcount >= 2)
5820          {
5821          offsets[0] = erroroffset;
5822          offsets[1] = errorcode;
5823          }
5824        return (errorcode <= PCRE_UTF8_ERR5 && md->partial > 1)?
5825          PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
5826        }
5827    
5828      /* Check that a start_offset points to the start of a UTF-8 character. */
5829      if (start_offset > 0 && start_offset < length &&
5830          (((USPTR)subject)[start_offset] & 0xc0) == 0x80)
5831        return PCRE_ERROR_BADUTF8_OFFSET;
5832      }
5833    #endif
5834    
5835    /* If the pattern was successfully studied with JIT support, run the JIT
5836    executable instead of the rest of this function. Most options must be set at
5837    compile time for the JIT code to be usable. Fallback to the normal code path if
5838    an unsupported flag is set. In particular, JIT does not support partial
5839    matching. */
5840    
5841    #ifdef SUPPORT_JIT
5842    if (extra_data != NULL
5843        && (extra_data->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
5844        && extra_data->executable_jit != NULL
5845        && (options & ~(PCRE_NO_UTF8_CHECK | PCRE_NOTBOL | PCRE_NOTEOL |
5846                        PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART)) == 0)
5847      return _pcre_jit_exec(re, extra_data->executable_jit, subject, length,
5848        start_offset, options, ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0)
5849        ? MATCH_LIMIT : extra_data->match_limit, offsets, offsetcount);
5850    #endif
5851    
5852    /* Carry on with non-JIT matching. This information is for finding all the
5853    numbers associated with a given name, for condition testing. */
5854    
5855  md->name_table = (uschar *)re + re->name_table_offset;  md->name_table = (uschar *)re + re->name_table_offset;
5856  md->name_count = re->name_count;  md->name_count = re->name_count;
# Line 5861  md->end_subject = md->start_subject + le Line 5917  md->end_subject = md->start_subject + le
5917  end_subject = md->end_subject;  end_subject = md->end_subject;
5918    
5919  md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;  md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
 utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;  
5920  md->use_ucp = (re->options & PCRE_UCP) != 0;  md->use_ucp = (re->options & PCRE_UCP) != 0;
5921  md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;  md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
5922    
# Line 5872  md->notbol = (options & PCRE_NOTBOL) != Line 5927  md->notbol = (options & PCRE_NOTBOL) !=
5927  md->noteol = (options & PCRE_NOTEOL) != 0;  md->noteol = (options & PCRE_NOTEOL) != 0;
5928  md->notempty = (options & PCRE_NOTEMPTY) != 0;  md->notempty = (options & PCRE_NOTEMPTY) != 0;
5929  md->notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;  md->notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
 md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :  
               ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;  
   
5930    
5931  md->hitend = FALSE;  md->hitend = FALSE;
5932  md->mark = NULL;                        /* In case never set */  md->mark = NULL;                        /* In case never set */
# Line 5957  defined (though never set). So there's n Line 6009  defined (though never set). So there's n
6009  if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)  if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)
6010    return PCRE_ERROR_BADPARTIAL;    return PCRE_ERROR_BADPARTIAL;
6011    
 /* Check a UTF-8 string if required. Pass back the character offset and error  
 code for an invalid string if a results vector is available. */  
   
 #ifdef SUPPORT_UTF8  
 if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)  
   {  
   int erroroffset;  
   int errorcode = _pcre_valid_utf8((USPTR)subject, length, &erroroffset);  
   if (errorcode != 0)  
     {  
     if (offsetcount >= 2)  
       {  
       offsets[0] = erroroffset;  
       offsets[1] = errorcode;  
       }  
     return (errorcode <= PCRE_UTF8_ERR5 && md->partial > 1)?  
       PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;  
     }  
   
   /* Check that a start_offset points to the start of a UTF-8 character. */  
   
   if (start_offset > 0 && start_offset < length &&  
       (((USPTR)subject)[start_offset] & 0xc0) == 0x80)  
     return PCRE_ERROR_BADUTF8_OFFSET;  
   }  
 #endif  
   
6012  /* If the expression has got more back references than the offsets supplied can  /* If the expression has got more back references than the offsets supplied can
6013  hold, we get a temporary chunk of working store to use during the matching.  hold, we get a temporary chunk of working store to use during the matching.
6014  Otherwise, we can use the vector supplied, rounding down its size to a multiple  Otherwise, we can use the vector supplied, rounding down its size to a multiple
6015  of 3. */  of 3. */
6016    
6017  ocount = offsetcount - (offsetcount % 3);  ocount = offsetcount - (offsetcount % 3);
6018    arg_offset_max = (2*ocount)/3;
6019    
6020  if (re->top_backref > 0 && re->top_backref >= ocount/3)  if (re->top_backref > 0 && re->top_backref >= ocount/3)
6021    {    {
# Line 6364  if (rc == MATCH_MATCH || rc == MATCH_ACC Line 6390  if (rc == MATCH_MATCH || rc == MATCH_ACC
6390    {    {
6391    if (using_temporary_offsets)    if (using_temporary_offsets)
6392      {      {
6393      if (offsetcount >= 4)      if (arg_offset_max >= 4)
6394        {        {
6395        memcpy(offsets + 2, md->offset_vector + 2,        memcpy(offsets + 2, md->offset_vector + 2,
6396          (offsetcount - 2) * sizeof(int));          (arg_offset_max - 2) * sizeof(int));
6397        DPRINTF(("Copied offsets from temporary memory\n"));        DPRINTF(("Copied offsets from temporary memory\n"));
6398        }        }
6399      if (md->end_offset_top > offsetcount) md->offset_overflow = TRUE;      if (md->end_offset_top > arg_offset_max) md->offset_overflow = TRUE;
6400      DPRINTF(("Freeing temporary memory\n"));      DPRINTF(("Freeing temporary memory\n"));
6401      (pcre_free)(md->offset_vector);      (pcre_free)(md->offset_vector);
6402      }      }
6403    
6404    /* Set the return code to the number of captured strings, or 0 if there are    /* Set the return code to the number of captured strings, or 0 if there were
6405    too many to fit into the vector. */    too many to fit into the vector. */
6406    
6407    rc = md->offset_overflow? 0 : md->end_offset_top/2;    rc = (md->offset_overflow && md->end_offset_top >= arg_offset_max)?
6408        0 : md->end_offset_top/2;
6409    
6410    /* If there is space in the offset vector, set any unused pairs at the end of    /* If there is space in the offset vector, set any unused pairs at the end of
6411    the pattern to -1 for backwards compatibility. It is documented that this    the pattern to -1 for backwards compatibility. It is documented that this
6412    happens. In earlier versions, the whole set of potential capturing offsets    happens. In earlier versions, the whole set of potential capturing offsets
6413    was set to -1 each time round the loop, but this is handled differently now.    was set to -1 each time round the loop, but this is handled differently now.
6414    "Gaps" are set to -1 dynamically instead (this fixes a bug). Thus, it is only    "Gaps" are set to -1 dynamically instead (this fixes a bug). Thus, it is only
6415    those at the end that need unsetting here. We can't just unset them all at    those at the end that need unsetting here. We can't just unset them all at
6416    the start of the whole thing because they may get set in one branch that is    the start of the whole thing because they may get set in one branch that is
6417    not the final matching branch. */    not the final matching branch. */

Legend:
Removed from v.648  
changed lines
  Added in v.677

  ViewVC Help
Powered by ViewVC 1.1.5