/[pcre]/code/branches/pcre16/pcre_exec.c
ViewVC logotype

Diff of /code/branches/pcre16/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 770 by zherczeg, Mon Nov 28 20:39:30 2011 UTC revision 774 by zherczeg, Thu Dec 1 06:08:45 2011 UTC
# Line 5964  pcre16_exec(const pcre *argument_re, con Line 5964  pcre16_exec(const pcre *argument_re, con
5964  #endif  #endif
5965  {  {
5966  int rc, ocount, arg_offset_max;  int rc, ocount, arg_offset_max;
 int first_byte = -1;  
 int req_byte = -1;  
 int req_byte2 = -1;  
5967  int newline;  int newline;
5968  BOOL using_temporary_offsets = FALSE;  BOOL using_temporary_offsets = FALSE;
5969  BOOL anchored;  BOOL anchored;
5970  BOOL startline;  BOOL startline;
5971  BOOL firstline;  BOOL firstline;
 BOOL first_byte_caseless = FALSE;  
 BOOL req_byte_caseless = FALSE;  
5972  BOOL utf8;  BOOL utf8;
5973    BOOL has_first_char = FALSE;
5974    BOOL has_req_char = FALSE;
5975    pcre_uchar first_char = 0;
5976    pcre_uchar first_char2 = 0;
5977    pcre_uchar req_char = 0;
5978    pcre_uchar req_char2 = 0;
5979  match_data match_block;  match_data match_block;
5980  match_data *md = &match_block;  match_data *md = &match_block;
5981  const pcre_uint8 *tables;  const pcre_uint8 *tables;
# Line 5982  const pcre_uint8 *start_bits = NULL; Line 5983  const pcre_uint8 *start_bits = NULL;
5983  PCRE_PUCHAR start_match = (PCRE_PUCHAR)subject + start_offset;  PCRE_PUCHAR start_match = (PCRE_PUCHAR)subject + start_offset;
5984  PCRE_PUCHAR end_subject;  PCRE_PUCHAR end_subject;
5985  PCRE_PUCHAR start_partial = NULL;  PCRE_PUCHAR start_partial = NULL;
5986  PCRE_PUCHAR req_byte_ptr = start_match - 1;  PCRE_PUCHAR req_char_ptr = start_match - 1;
5987    
5988  pcre_study_data internal_study;  pcre_study_data internal_study;
5989  const pcre_study_data *study;  const pcre_study_data *study;
# Line 6252  if (md->offset_vector != NULL) Line 6253  if (md->offset_vector != NULL)
6253    md->offset_vector[0] = md->offset_vector[1] = -1;    md->offset_vector[0] = md->offset_vector[1] = -1;
6254    }    }
6255    
6256  /* Set up the first character to match, if available. The first_byte value is  /* Set up the first character to match, if available. The first_char value is
6257  never set for an anchored regular expression, but the anchoring may be forced  never set for an anchored regular expression, but the anchoring may be forced
6258  at run time, so we have to test for anchoring. The first char may be unset for  at run time, so we have to test for anchoring. The first char may be unset for
6259  an unanchored pattern, of course. If there's no first char and the pattern was  an unanchored pattern, of course. If there's no first char and the pattern was
# Line 6262  if (!anchored) Line 6263  if (!anchored)
6263    {    {
6264    if ((re->flags & PCRE_FIRSTSET) != 0)    if ((re->flags & PCRE_FIRSTSET) != 0)
6265      {      {
6266      first_byte = re->first_byte & 255;      has_first_char = TRUE;
6267      if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)      first_char = first_char2 = re->first_char;
6268        first_byte = md->lcc[first_byte];      if ((re->flags & PCRE_FCH_CASELESS) != 0)
6269          first_char2 = TABLE_GET(first_char, tables + fcc_offset, first_char);
6270      }      }
6271    else    else
6272      if (!startline && study != NULL &&      if (!startline && study != NULL &&
# Line 6277  character" set. */ Line 6279  character" set. */
6279    
6280  if ((re->flags & PCRE_REQCHSET) != 0)  if ((re->flags & PCRE_REQCHSET) != 0)
6281    {    {
6282    req_byte = re->req_byte & 255;    has_req_char = TRUE;
6283    req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;    req_char = req_char2 = re->req_char;
6284    req_byte2 = (tables + fcc_offset)[req_byte];  /* case flipped */    if ((re->flags & PCRE_RCH_CASELESS) != 0)
6285        req_char2 = TABLE_GET(req_char, tables + fcc_offset, req_char);
6286    }    }
6287    
6288    
   
   
6289  /* ==========================================================================*/  /* ==========================================================================*/
6290    
6291  /* Loop for handling unanchored repeated matching attempts; for anchored regexs  /* Loop for handling unanchored repeated matching attempts; for anchored regexs
# Line 6327  for(;;) Line 6328  for(;;)
6328    
6329    if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)    if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)
6330      {      {
6331      /* Advance to a unique first byte if there is one. */      /* Advance to a unique first char if there is one. */
6332    
6333      if (first_byte >= 0)      if (has_first_char)
6334        {        {
6335        if (first_byte_caseless)        if (first_char != first_char2)
6336          while (start_match < end_subject && md->lcc[*start_match] != first_byte)          while (start_match < end_subject &&
6337                *start_match != first_char && *start_match != first_char2)
6338            start_match++;            start_match++;
6339        else        else
6340          while (start_match < end_subject && *start_match != first_byte)          while (start_match < end_subject && *start_match != first_char)
6341            start_match++;            start_match++;
6342        }        }
6343    
# Line 6418  for(;;) Line 6420  for(;;)
6420        break;        break;
6421        }        }
6422    
6423      /* If req_byte is set, we know that that character must appear in the      /* If req_char is set, we know that that character must appear in the
6424      subject for the match to succeed. If the first character is set, req_byte      subject for the match to succeed. If the first character is set, req_char
6425      must be later in the subject; otherwise the test starts at the match point.      must be later in the subject; otherwise the test starts at the match point.
6426      This optimization can save a huge amount of backtracking in patterns with      This optimization can save a huge amount of backtracking in patterns with
6427      nested unlimited repeats that aren't going to match. Writing separate code      nested unlimited repeats that aren't going to match. Writing separate code
# Line 6432  for(;;) Line 6434  for(;;)
6434      32-megabyte string... so we don't do this when the string is sufficiently      32-megabyte string... so we don't do this when the string is sufficiently
6435      long. */      long. */
6436    
6437      if (req_byte >= 0 && end_subject - start_match < REQ_BYTE_MAX)      if (has_req_char && end_subject - start_match < REQ_BYTE_MAX)
6438        {        {
6439        register PCRE_PUCHAR p = start_match + ((first_byte >= 0)? 1 : 0);        register PCRE_PUCHAR p = start_match + (has_first_char? 1:0);
6440    
6441        /* We don't need to repeat the search if we haven't yet reached the        /* We don't need to repeat the search if we haven't yet reached the
6442        place we found it at last time. */        place we found it at last time. */
6443    
6444        if (p > req_byte_ptr)        if (p > req_char_ptr)
6445          {          {
6446          if (req_byte_caseless)          if (req_char != req_char2)
6447            {            {
6448            while (p < end_subject)            while (p < end_subject)
6449              {              {
6450              register int pp = *p++;              register int pp = *p++;
6451              if (pp == req_byte || pp == req_byte2) { p--; break; }              if (pp == req_char || pp == req_char2) { p--; break; }
6452              }              }
6453            }            }
6454          else          else
6455            {            {
6456            while (p < end_subject)            while (p < end_subject)
6457              {              {
6458              if (*p++ == req_byte) { p--; break; }              if (*p++ == req_char) { p--; break; }
6459              }              }
6460            }            }
6461    
# Line 6470  for(;;) Line 6472  for(;;)
6472          found it, so that we don't search again next time round the loop if          found it, so that we don't search again next time round the loop if
6473          the start hasn't passed this character yet. */          the start hasn't passed this character yet. */
6474    
6475          req_byte_ptr = p;          req_char_ptr = p;
6476          }          }
6477        }        }
6478      }      }

Legend:
Removed from v.770  
changed lines
  Added in v.774

  ViewVC Help
Powered by ViewVC 1.1.5