/[pcre]/code/trunk/pcre_dfa_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_dfa_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1425 by ph10, Tue Dec 31 17:44:40 2013 UTC revision 1430 by ph10, Wed Jan 1 17:11:54 2014 UTC
# Line 3466  for (;;) Line 3466  for (;;)
3466    
3467      if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)      if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)
3468        {        {
3469        /* Advance to a known first char. */        /* Advance to a known first pcre_uchar (i.e. data item) */
3470    
3471        if (has_first_char)        if (has_first_char)
3472          {          {
# Line 3516  for (;;) Line 3516  for (;;)
3516            }            }
3517          }          }
3518    
3519        /* Or to a non-unique first char after study */        /* Advance to a non-unique first pcre_uchar after study */
3520    
3521        else if (start_bits != NULL)        else if (start_bits != NULL)
3522          {          {
# Line 3526  for (;;) Line 3526  for (;;)
3526  #ifndef COMPILE_PCRE8  #ifndef COMPILE_PCRE8
3527            if (c > 255) c = 255;            if (c > 255) c = 255;
3528  #endif  #endif
3529            if ((start_bits[c/8] & (1 << (c&7))) == 0)            if ((start_bits[c/8] & (1 << (c&7))) != 0) break;
3530              {            current_subject++;
             current_subject++;  
 #if defined SUPPORT_UTF && defined COMPILE_PCRE8  
             /* In non 8-bit mode, the iteration will stop for  
             characters > 255 at the beginning or not stop at all. */  
             if (utf)  
               ACROSSCHAR(current_subject < end_subject, *current_subject,  
                 current_subject++);  
 #endif  
             }  
           else break;  
3531            }            }
3532          }          }
3533        }        }
# Line 3556  for (;;) Line 3546  for (;;)
3546        /* If the pattern was studied, a minimum subject length may be set. This        /* If the pattern was studied, a minimum subject length may be set. This
3547        is a lower bound; no actual string of that length may actually match the        is a lower bound; no actual string of that length may actually match the
3548        pattern. Although the value is, strictly, in characters, we treat it as        pattern. Although the value is, strictly, in characters, we treat it as
3549        bytes to avoid spending too much time in this optimization. */        in pcre_uchar units to avoid spending too much time in this optimization.
3550          */
3551    
3552        if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0 &&        if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0 &&
3553            (pcre_uint32)(end_subject - current_subject) < study->minlength)            (pcre_uint32)(end_subject - current_subject) < study->minlength)
3554          return PCRE_ERROR_NOMATCH;          return PCRE_ERROR_NOMATCH;
3555    
3556        /* If req_char is set, we know that that character must appear in the        /* If req_char is set, we know that that pcre_uchar must appear in the
3557        subject for the match to succeed. If the first character is set, req_char        subject for the match to succeed. If the first pcre_uchar is set,
3558        must be later in the subject; otherwise the test starts at the match        req_char must be later in the subject; otherwise the test starts at the
3559        point. This optimization can save a huge amount of work in patterns with        match point. This optimization can save a huge amount of work in patterns
3560        nested unlimited repeats that aren't going to match. Writing separate        with nested unlimited repeats that aren't going to match. Writing
3561        code for cased/caseless versions makes it go faster, as does using an        separate code for cased/caseless versions makes it go faster, as does
3562        autoincrement and backing off on a match.        using an autoincrement and backing off on a match.
3563    
3564        HOWEVER: when the subject string is very, very long, searching to its end        HOWEVER: when the subject string is very, very long, searching to its end
3565        can take a long time, and give bad performance on quite ordinary        can take a long time, and give bad performance on quite ordinary
# Line 3600  for (;;) Line 3591  for (;;)
3591                }                }
3592              }              }
3593    
3594            /* If we can't find the required character, break the matching loop,            /* If we can't find the required pcre_uchar, break the matching loop,
3595            which will cause a return or PCRE_ERROR_NOMATCH. */            which will cause a return or PCRE_ERROR_NOMATCH. */
3596    
3597            if (p >= end_subject) break;            if (p >= end_subject) break;
3598    
3599            /* If we have found the required character, save the point where we            /* If we have found the required pcre_uchar, save the point where we
3600            found it, so that we don't search again next time round the loop if            found it, so that we don't search again next time round the loop if
3601            the start hasn't passed this character yet. */            the start hasn't passed this point yet. */
3602    
3603            req_char_ptr = p;            req_char_ptr = p;
3604            }            }

Legend:
Removed from v.1425  
changed lines
  Added in v.1430

  ViewVC Help
Powered by ViewVC 1.1.5