/[pcre]/code/trunk/pcre_dfa_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_dfa_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 442 by ph10, Fri Sep 11 10:21:02 2009 UTC revision 455 by ph10, Sat Sep 26 19:12:32 2009 UTC
# Line 2651  if (extra_data != NULL) Line 2651  if (extra_data != NULL)
2651    if ((flags & PCRE_EXTRA_TABLES) != 0)    if ((flags & PCRE_EXTRA_TABLES) != 0)
2652      md->tables = extra_data->tables;      md->tables = extra_data->tables;
2653    }    }
2654    
2655  /* Check that the first field in the block is the magic number. If it is not,  /* Check that the first field in the block is the magic number. If it is not,
2656  test for a regex that was compiled on a host of opposite endianness. If this is  test for a regex that was compiled on a host of opposite endianness. If this is
2657  the case, flipped values are put in internal_re and internal_study if there was  the case, flipped values are put in internal_re and internal_study if there was
# Line 2790  if (!anchored) Line 2790  if (!anchored)
2790      }      }
2791    else    else
2792      {      {
2793      if (startline && study != NULL &&      if (!startline && study != NULL &&
2794           (study->options & PCRE_STUDY_MAPPED) != 0)           (study->flags & PCRE_STUDY_MAPPED) != 0)
2795        start_bits = study->start_bits;        start_bits = study->start_bits;
2796      }      }
2797    }    }
# Line 2842  for (;;) Line 2842  for (;;)
2842        }        }
2843    
2844      /* There are some optimizations that avoid running the match if a known      /* There are some optimizations that avoid running the match if a known
2845      starting point is not found, or if a known later character is not present.      starting point is not found. However, there is an option that disables
2846      However, there is an option that disables these, for testing and for      these, for testing and for ensuring that all callouts do actually occur. */
     ensuring that all callouts do actually occur. */  
2847    
2848      if ((options & PCRE_NO_START_OPTIMIZE) == 0)      if ((options & PCRE_NO_START_OPTIMIZE) == 0)
2849        {        {
   
2850        /* Advance to a known first byte. */        /* Advance to a known first byte. */
2851    
2852        if (first_byte >= 0)        if (first_byte >= 0)
# Line 2914  for (;;) Line 2912  for (;;)
2912      /* Restore fudged end_subject */      /* Restore fudged end_subject */
2913    
2914      end_subject = save_end_subject;      end_subject = save_end_subject;
     }  
2915    
2916    /* If req_byte is set, we know that that character must appear in the subject      /* The following two optimizations are disabled for partial matching or if
2917    for the match to succeed. If the first character is set, req_byte must be      disabling is explicitly requested (and of course, by the test above, this
2918    later in the subject; otherwise the test starts at the match point. This      code is not obeyed when restarting after a partial match). */
2919    optimization can save a huge amount of work in patterns with nested unlimited  
2920    repeats that aren't going to match. Writing separate code for cased/caseless      if ((options & PCRE_NO_START_OPTIMIZE) == 0 &&
2921    versions makes it go faster, as does using an autoincrement and backing off          (options & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) == 0)
2922    on a match.        {
2923          /* If the pattern was studied, a minimum subject length may be set. This
2924    HOWEVER: when the subject string is very, very long, searching to its end can        is a lower bound; no actual string of that length may actually match the
2925    take a long time, and give bad performance on quite ordinary patterns. This        pattern. Although the value is, strictly, in characters, we treat it as
2926    showed up when somebody was matching /^C/ on a 32-megabyte string... so we        bytes to avoid spending too much time in this optimization. */
2927    don't do this when the string is sufficiently long.  
2928          if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0 &&
2929    ALSO: this processing is disabled when partial matching is requested, and can            end_subject - current_subject < study->minlength)
2930    also be explicitly deactivated. Furthermore, we have to disable when          return PCRE_ERROR_NOMATCH;
2931    restarting after a partial match, because the required character may have  
2932    already been matched. */        /* If req_byte is set, we know that that character must appear in the
2933          subject for the match to succeed. If the first character is set, req_byte
2934    if ((options & PCRE_NO_START_OPTIMIZE) == 0 &&        must be later in the subject; otherwise the test starts at the match
2935        req_byte >= 0 &&        point. This optimization can save a huge amount of work in patterns with
2936        end_subject - current_subject < REQ_BYTE_MAX &&        nested unlimited repeats that aren't going to match. Writing separate
2937        (options & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT|PCRE_DFA_RESTART)) == 0)        code for cased/caseless versions makes it go faster, as does using an
2938      {        autoincrement and backing off on a match.
2939      register const uschar *p = current_subject + ((first_byte >= 0)? 1 : 0);  
2940          HOWEVER: when the subject string is very, very long, searching to its end
2941      /* We don't need to repeat the search if we haven't yet reached the        can take a long time, and give bad performance on quite ordinary
2942      place we found it at last time. */        patterns. This showed up when somebody was matching /^C/ on a 32-megabyte
2943          string... so we don't do this when the string is sufficiently long. */
2944      if (p > req_byte_ptr)  
2945        {        if (req_byte >= 0 && end_subject - current_subject < REQ_BYTE_MAX)
       if (req_byte_caseless)  
2946          {          {
2947          while (p < end_subject)          register const uschar *p = current_subject + ((first_byte >= 0)? 1 : 0);
2948    
2949            /* We don't need to repeat the search if we haven't yet reached the
2950            place we found it at last time. */
2951    
2952            if (p > req_byte_ptr)
2953            {            {
2954            register int pp = *p++;            if (req_byte_caseless)
2955            if (pp == req_byte || pp == req_byte2) { p--; break; }              {
2956            }              while (p < end_subject)
2957          }                {
2958        else                register int pp = *p++;
2959          {                if (pp == req_byte || pp == req_byte2) { p--; break; }
2960          while (p < end_subject)                }
2961            {              }
2962            if (*p++ == req_byte) { p--; break; }            else
2963                {
2964                while (p < end_subject)
2965                  {
2966                  if (*p++ == req_byte) { p--; break; }
2967                  }
2968                }
2969    
2970              /* If we can't find the required character, break the matching loop,
2971              which will cause a return or PCRE_ERROR_NOMATCH. */
2972    
2973              if (p >= end_subject) break;
2974    
2975              /* If we have found the required character, save the point where we
2976              found it, so that we don't search again next time round the loop if
2977              the start hasn't passed this character yet. */
2978    
2979              req_byte_ptr = p;
2980            }            }
2981          }          }
   
       /* If we can't find the required character, break the matching loop,  
       which will cause a return or PCRE_ERROR_NOMATCH. */  
   
       if (p >= end_subject) break;  
   
       /* If we have found the required character, save the point where we  
       found it, so that we don't search again next time round the loop if  
       the start hasn't passed this character yet. */  
   
       req_byte_ptr = p;  
2982        }        }
2983      }      }   /* End of optimizations that are done when not restarting */
2984    
2985    /* OK, now we can do the business */    /* OK, now we can do the business */
2986    

Legend:
Removed from v.442  
changed lines
  Added in v.455

  ViewVC Help
Powered by ViewVC 1.1.5