/[pcre]/code/trunk/pcre_dfa_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_dfa_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 383 by ph10, Sun Mar 8 15:26:59 2009 UTC revision 389 by ph10, Sun Mar 15 18:24:05 2009 UTC
# Line 2714  if ((re->flags & PCRE_REQCHSET) != 0) Line 2714  if ((re->flags & PCRE_REQCHSET) != 0)
2714    }    }
2715    
2716  /* Call the main matching function, looping for a non-anchored regex after a  /* Call the main matching function, looping for a non-anchored regex after a
2717  failed match. Unless restarting, optimize by moving to the first match  failed match. If not restarting, perform certain optimizations at the start of
2718  character if possible, when not anchored. Then unless wanting a partial match,  a match. */
 check for a required later character. */  
2719    
2720  for (;;)  for (;;)
2721    {    {
# Line 2726  for (;;) Line 2725  for (;;)
2725      {      {
2726      const uschar *save_end_subject = end_subject;      const uschar *save_end_subject = end_subject;
2727    
2728      /* Advance to a unique first char if possible. If firstline is TRUE, the      /* If firstline is TRUE, the start of the match is constrained to the first
2729      start of the match is constrained to the first line of a multiline string.      line of a multiline string. Implement this by temporarily adjusting
2730      Implement this by temporarily adjusting end_subject so that we stop      end_subject so that we stop scanning at a newline. If the match fails at
2731      scanning at a newline. If the match fails at the newline, later code breaks      the newline, later code breaks this loop. */
     this loop. */  
2732    
2733      if (firstline)      if (firstline)
2734        {        {
# Line 2749  for (;;) Line 2747  for (;;)
2747        while (t < md->end_subject && !IS_NEWLINE(t)) t++;        while (t < md->end_subject && !IS_NEWLINE(t)) t++;
2748        end_subject = t;        end_subject = t;
2749        }        }
2750    
2751      if (first_byte >= 0)      /* There are some optimizations that avoid running the match if a known
2752        {      starting point is not found, or if a known later character is not present.
2753        if (first_byte_caseless)      However, there is an option that disables these, for testing and for
2754          while (current_subject < end_subject &&      ensuring that all callouts do actually occur. */
2755                 lcc[*current_subject] != first_byte)  
2756            current_subject++;      if ((options & PCRE_NO_START_OPTIMIZE) == 0)
2757        else        {
2758          while (current_subject < end_subject && *current_subject != first_byte)  
2759            current_subject++;        /* Advance to a known first byte. */
2760        }  
2761          if (first_byte >= 0)
     /* Or to just after a linebreak for a multiline match if possible */  
   
     else if (startline)  
       {  
       if (current_subject > md->start_subject + start_offset)  
2762          {          {
2763  #ifdef SUPPORT_UTF8          if (first_byte_caseless)
2764          if (utf8)            while (current_subject < end_subject &&
2765                     lcc[*current_subject] != first_byte)
2766                current_subject++;
2767            else
2768              while (current_subject < end_subject &&
2769                     *current_subject != first_byte)
2770                current_subject++;
2771            }
2772    
2773          /* Or to just after a linebreak for a multiline match if possible */
2774    
2775          else if (startline)
2776            {
2777            if (current_subject > md->start_subject + start_offset)
2778            {            {
2779            while (current_subject < end_subject && !WAS_NEWLINE(current_subject))  #ifdef SUPPORT_UTF8
2780              if (utf8)
2781              {              {
2782              current_subject++;              while (current_subject < end_subject &&
2783              while(current_subject < end_subject &&                     !WAS_NEWLINE(current_subject))
2784                    (*current_subject & 0xc0) == 0x80)                {
2785                current_subject++;                current_subject++;
2786                  while(current_subject < end_subject &&
2787                        (*current_subject & 0xc0) == 0x80)
2788                    current_subject++;
2789                  }
2790              }              }
2791            }            else
         else  
2792  #endif  #endif
2793          while (current_subject < end_subject && !WAS_NEWLINE(current_subject))            while (current_subject < end_subject && !WAS_NEWLINE(current_subject))
2794            current_subject++;              current_subject++;
2795    
2796          /* If we have just passed a CR and the newline option is ANY or            /* If we have just passed a CR and the newline option is ANY or
2797          ANYCRLF, and we are now at a LF, advance the match position by one more            ANYCRLF, and we are now at a LF, advance the match position by one
2798          character. */            more character. */
2799    
2800          if (current_subject[-1] == '\r' &&            if (current_subject[-1] == '\r' &&
2801               (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&                 (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
2802               current_subject < end_subject &&                 current_subject < end_subject &&
2803               *current_subject == '\n')                 *current_subject == '\n')
2804            current_subject++;              current_subject++;
2805              }
2806          }          }
2807        }  
2808          /* Or to a non-unique first char after study */
2809      /* Or to a non-unique first char after study */  
2810          else if (start_bits != NULL)
     else if (start_bits != NULL)  
       {  
       while (current_subject < end_subject)  
2811          {          {
2812          register unsigned int c = *current_subject;          while (current_subject < end_subject)
2813          if ((start_bits[c/8] & (1 << (c&7))) == 0) current_subject++;            {
2814            else break;            register unsigned int c = *current_subject;
2815              if ((start_bits[c/8] & (1 << (c&7))) == 0) current_subject++;
2816                else break;
2817              }
2818          }          }
2819        }        }
2820    
2821      /* Restore fudged end_subject */      /* Restore fudged end_subject */
2822    
# Line 2825  for (;;) Line 2836  for (;;)
2836    showed up when somebody was matching /^C/ on a 32-megabyte string... so we    showed up when somebody was matching /^C/ on a 32-megabyte string... so we
2837    don't do this when the string is sufficiently long.    don't do this when the string is sufficiently long.
2838    
2839    ALSO: this processing is disabled when partial matching is requested.    ALSO: this processing is disabled when partial matching is requested, and can
2840    */    also be explicitly deactivated. */
2841    
2842    if (req_byte >= 0 &&    if ((options & PCRE_NO_START_OPTIMIZE) == 0 &&
2843          req_byte >= 0 &&
2844        end_subject - current_subject < REQ_BYTE_MAX &&        end_subject - current_subject < REQ_BYTE_MAX &&
2845        (options & PCRE_PARTIAL) == 0)        (options & PCRE_PARTIAL) == 0)
2846      {      {

Legend:
Removed from v.383  
changed lines
  Added in v.389

  ViewVC Help
Powered by ViewVC 1.1.5