/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 654 by ph10, Tue Aug 2 11:00:40 2011 UTC revision 667 by ph10, Mon Aug 22 14:57:32 2011 UTC
# Line 5761  pcre_exec(const pcre *argument_re, const Line 5761  pcre_exec(const pcre *argument_re, const
5761    PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,    PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
5762    int offsetcount)    int offsetcount)
5763  {  {
5764  int rc, ocount;  int rc, ocount, arg_offset_max;
5765  int first_byte = -1;  int first_byte = -1;
5766  int req_byte = -1;  int req_byte = -1;
5767  int req_byte2 = -1;  int req_byte2 = -1;
# Line 5797  if (re == NULL || subject == NULL || Line 5797  if (re == NULL || subject == NULL ||
5797  if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;  if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
5798  if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;  if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
5799    
5800  /* This information is for finding all the numbers associated with a given  /* These two settings are used in the code for checking a UTF-8 string that
5801  name, for condition testing. */  follows immediately afterwards. Other values in the md block are used only
5802    during "normal" pcre_exec() processing, not when the JIT support is in use,
5803    so they are set up later. */
5804    
5805    utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;
5806    md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :
5807                  ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;
5808    
5809    /* Check a UTF-8 string if required. Pass back the character offset and error
5810    code for an invalid string if a results vector is available. */
5811    
5812    #ifdef SUPPORT_UTF8
5813    if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
5814      {
5815      int erroroffset;
5816      int errorcode = _pcre_valid_utf8((USPTR)subject, length, &erroroffset);
5817      if (errorcode != 0)
5818        {
5819        if (offsetcount >= 2)
5820          {
5821          offsets[0] = erroroffset;
5822          offsets[1] = errorcode;
5823          }
5824        return (errorcode <= PCRE_UTF8_ERR5 && md->partial > 1)?
5825          PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
5826        }
5827    
5828      /* Check that a start_offset points to the start of a UTF-8 character. */
5829      if (start_offset > 0 && start_offset < length &&
5830          (((USPTR)subject)[start_offset] & 0xc0) == 0x80)
5831        return PCRE_ERROR_BADUTF8_OFFSET;
5832      }
5833    #endif
5834    
5835    /* If the pattern was successfully studied with JIT support, run the JIT
5836    executable instead of the rest of this function. Most options must be set at
5837    compile time for the JIT code to be usable. Fallback to the normal code path if
5838    an unsupported flag is set. In particular, JIT does not support partial
5839    matching. */
5840    
5841    #ifdef SUPPORT_JIT
5842    if (extra_data != NULL
5843        && (extra_data->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
5844        && extra_data->executable_jit != NULL
5845        && (options & ~(PCRE_NO_UTF8_CHECK | PCRE_NOTBOL | PCRE_NOTEOL |
5846                        PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART)) == 0)
5847      return _pcre_jit_exec(re, extra_data->executable_jit, subject, length,
5848        start_offset, options, offsets, offsetcount);
5849    #endif
5850    
5851    /* Carry on with non-JIT matching. This information is for finding all the
5852    numbers associated with a given name, for condition testing. */
5853    
5854  md->name_table = (uschar *)re + re->name_table_offset;  md->name_table = (uschar *)re + re->name_table_offset;
5855  md->name_count = re->name_count;  md->name_count = re->name_count;
# Line 5865  md->end_subject = md->start_subject + le Line 5916  md->end_subject = md->start_subject + le
5916  end_subject = md->end_subject;  end_subject = md->end_subject;
5917    
5918  md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;  md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
 utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;  
5919  md->use_ucp = (re->options & PCRE_UCP) != 0;  md->use_ucp = (re->options & PCRE_UCP) != 0;
5920  md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;  md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
5921    
# Line 5876  md->notbol = (options & PCRE_NOTBOL) != Line 5926  md->notbol = (options & PCRE_NOTBOL) !=
5926  md->noteol = (options & PCRE_NOTEOL) != 0;  md->noteol = (options & PCRE_NOTEOL) != 0;
5927  md->notempty = (options & PCRE_NOTEMPTY) != 0;  md->notempty = (options & PCRE_NOTEMPTY) != 0;
5928  md->notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;  md->notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
 md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :  
               ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;  
   
5929    
5930  md->hitend = FALSE;  md->hitend = FALSE;
5931  md->mark = NULL;                        /* In case never set */  md->mark = NULL;                        /* In case never set */
# Line 5961  defined (though never set). So there's n Line 6008  defined (though never set). So there's n
6008  if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)  if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)
6009    return PCRE_ERROR_BADPARTIAL;    return PCRE_ERROR_BADPARTIAL;
6010    
 /* Check a UTF-8 string if required. Pass back the character offset and error  
 code for an invalid string if a results vector is available. */  
   
 #ifdef SUPPORT_UTF8  
 if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)  
   {  
   int erroroffset;  
   int errorcode = _pcre_valid_utf8((USPTR)subject, length, &erroroffset);  
   if (errorcode != 0)  
     {  
     if (offsetcount >= 2)  
       {  
       offsets[0] = erroroffset;  
       offsets[1] = errorcode;  
       }  
     return (errorcode <= PCRE_UTF8_ERR5 && md->partial > 1)?  
       PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;  
     }  
   
   /* Check that a start_offset points to the start of a UTF-8 character. */  
   
   if (start_offset > 0 && start_offset < length &&  
       (((USPTR)subject)[start_offset] & 0xc0) == 0x80)  
     return PCRE_ERROR_BADUTF8_OFFSET;  
   }  
 #endif  
   
6011  /* If the expression has got more back references than the offsets supplied can  /* If the expression has got more back references than the offsets supplied can
6012  hold, we get a temporary chunk of working store to use during the matching.  hold, we get a temporary chunk of working store to use during the matching.
6013  Otherwise, we can use the vector supplied, rounding down its size to a multiple  Otherwise, we can use the vector supplied, rounding down its size to a multiple
6014  of 3. */  of 3. */
6015    
6016  ocount = offsetcount - (offsetcount % 3);  ocount = offsetcount - (offsetcount % 3);
6017    arg_offset_max = (2*ocount)/3;
6018    
6019  if (re->top_backref > 0 && re->top_backref >= ocount/3)  if (re->top_backref > 0 && re->top_backref >= ocount/3)
6020    {    {
# Line 6368  if (rc == MATCH_MATCH || rc == MATCH_ACC Line 6389  if (rc == MATCH_MATCH || rc == MATCH_ACC
6389    {    {
6390    if (using_temporary_offsets)    if (using_temporary_offsets)
6391      {      {
6392      if (offsetcount >= 4)      if (arg_offset_max >= 4)
6393        {        {
6394        memcpy(offsets + 2, md->offset_vector + 2,        memcpy(offsets + 2, md->offset_vector + 2,
6395          (offsetcount - 2) * sizeof(int));          (arg_offset_max - 2) * sizeof(int));
6396        DPRINTF(("Copied offsets from temporary memory\n"));        DPRINTF(("Copied offsets from temporary memory\n"));
6397        }        }
6398      if (md->end_offset_top > offsetcount) md->offset_overflow = TRUE;      if (md->end_offset_top > arg_offset_max) md->offset_overflow = TRUE;
6399      DPRINTF(("Freeing temporary memory\n"));      DPRINTF(("Freeing temporary memory\n"));
6400      (pcre_free)(md->offset_vector);      (pcre_free)(md->offset_vector);
6401      }      }
6402    
6403    /* Set the return code to the number of captured strings, or 0 if there are    /* Set the return code to the number of captured strings, or 0 if there were
6404    too many to fit into the vector. */    too many to fit into the vector. */
6405    
6406    rc = md->offset_overflow? 0 : md->end_offset_top/2;    rc = (md->offset_overflow && md->end_offset_top >= arg_offset_max)?
6407        0 : md->end_offset_top/2;
6408    
6409    /* If there is space in the offset vector, set any unused pairs at the end of    /* If there is space in the offset vector, set any unused pairs at the end of
6410    the pattern to -1 for backwards compatibility. It is documented that this    the pattern to -1 for backwards compatibility. It is documented that this

Legend:
Removed from v.654  
changed lines
  Added in v.667

  ViewVC Help
Powered by ViewVC 1.1.5