/[pcre]/code/trunk/pcre_dfa_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_dfa_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 365 by ph10, Fri Jul 11 17:06:55 2008 UTC revision 389 by ph10, Sun Mar 15 18:24:05 2009 UTC
# Line 3  Line 3 
3  *************************************************/  *************************************************/
4    
5  /* PCRE is a library of functions to support regular expressions whose syntax  /* PCRE is a library of functions to support regular expressions whose syntax
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language (but see
7    below for why this module is different).
8    
9                         Written by Philip Hazel                         Written by Philip Hazel
10             Copyright (c) 1997-2008 University of Cambridge             Copyright (c) 1997-2009 University of Cambridge
11    
12  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
13  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 757  for (;;) Line 758  for (;;)
758        if ((md->moptions & PCRE_NOTEOL) == 0)        if ((md->moptions & PCRE_NOTEOL) == 0)
759          {          {
760          if (clen == 0 ||          if (clen == 0 ||
761              (IS_NEWLINE(ptr) &&              ((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr) &&
762                 ((ims & PCRE_MULTILINE) != 0 || ptr == end_subject - md->nllen)                 ((ims & PCRE_MULTILINE) != 0 || ptr == end_subject - md->nllen)
763              ))              ))
764            { ADD_ACTIVE(state_offset + 1, 0); }            { ADD_ACTIVE(state_offset + 1, 0); }
# Line 2713  if ((re->flags & PCRE_REQCHSET) != 0) Line 2714  if ((re->flags & PCRE_REQCHSET) != 0)
2714    }    }
2715    
2716  /* Call the main matching function, looping for a non-anchored regex after a  /* Call the main matching function, looping for a non-anchored regex after a
2717  failed match. Unless restarting, optimize by moving to the first match  failed match. If not restarting, perform certain optimizations at the start of
2718  character if possible, when not anchored. Then unless wanting a partial match,  a match. */
 check for a required later character. */  
2719    
2720  for (;;)  for (;;)
2721    {    {
# Line 2725  for (;;) Line 2725  for (;;)
2725      {      {
2726      const uschar *save_end_subject = end_subject;      const uschar *save_end_subject = end_subject;
2727    
2728      /* Advance to a unique first char if possible. If firstline is TRUE, the      /* If firstline is TRUE, the start of the match is constrained to the first
2729      start of the match is constrained to the first line of a multiline string.      line of a multiline string. Implement this by temporarily adjusting
2730      Implement this by temporarily adjusting end_subject so that we stop      end_subject so that we stop scanning at a newline. If the match fails at
2731      scanning at a newline. If the match fails at the newline, later code breaks      the newline, later code breaks this loop. */
     this loop. */  
2732    
2733      if (firstline)      if (firstline)
2734        {        {
2735        USPTR t = current_subject;        USPTR t = current_subject;
2736  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
2737        if (utf8)        if (utf8)
2738          {          {
2739          while (t < md->end_subject && !IS_NEWLINE(t))          while (t < md->end_subject && !IS_NEWLINE(t))
2740            {            {
2741            t++;            t++;
2742            while (t < end_subject && (*t & 0xc0) == 0x80) t++;            while (t < end_subject && (*t & 0xc0) == 0x80) t++;
2743            }            }
2744          }          }
2745        else        else
2746  #endif  #endif
2747        while (t < md->end_subject && !IS_NEWLINE(t)) t++;        while (t < md->end_subject && !IS_NEWLINE(t)) t++;
2748        end_subject = t;        end_subject = t;
2749        }        }
2750    
2751      if (first_byte >= 0)      /* There are some optimizations that avoid running the match if a known
2752        {      starting point is not found, or if a known later character is not present.
2753        if (first_byte_caseless)      However, there is an option that disables these, for testing and for
2754          while (current_subject < end_subject &&      ensuring that all callouts do actually occur. */
2755                 lcc[*current_subject] != first_byte)  
2756            current_subject++;      if ((options & PCRE_NO_START_OPTIMIZE) == 0)
2757        else        {
2758          while (current_subject < end_subject && *current_subject != first_byte)  
2759            current_subject++;        /* Advance to a known first byte. */
2760        }  
2761          if (first_byte >= 0)
2762      /* Or to just after a linebreak for a multiline match if possible */          {
2763            if (first_byte_caseless)
2764      else if (startline)            while (current_subject < end_subject &&
2765        {                   lcc[*current_subject] != first_byte)
2766        if (current_subject > md->start_subject + start_offset)              current_subject++;
2767            else
2768              while (current_subject < end_subject &&
2769                     *current_subject != first_byte)
2770                current_subject++;
2771            }
2772    
2773          /* Or to just after a linebreak for a multiline match if possible */
2774    
2775          else if (startline)
2776          {          {
2777  #ifdef SUPPORT_UTF8          if (current_subject > md->start_subject + start_offset)
         if (utf8)  
2778            {            {
2779            while (current_subject < end_subject && !WAS_NEWLINE(current_subject))  #ifdef SUPPORT_UTF8
2780              if (utf8)
2781              {              {
2782              current_subject++;              while (current_subject < end_subject &&
2783              while(current_subject < end_subject &&                     !WAS_NEWLINE(current_subject))
2784                    (*current_subject & 0xc0) == 0x80)                {
2785                current_subject++;                current_subject++;
2786              }                while(current_subject < end_subject &&
2787                        (*current_subject & 0xc0) == 0x80)
2788                    current_subject++;
2789                  }
2790                }
2791              else
2792    #endif
2793              while (current_subject < end_subject && !WAS_NEWLINE(current_subject))
2794                current_subject++;
2795    
2796              /* If we have just passed a CR and the newline option is ANY or
2797              ANYCRLF, and we are now at a LF, advance the match position by one
2798              more character. */
2799    
2800              if (current_subject[-1] == '\r' &&
2801                   (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
2802                   current_subject < end_subject &&
2803                   *current_subject == '\n')
2804                current_subject++;
2805              }
2806            }
2807    
2808          /* Or to a non-unique first char after study */
2809    
2810          else if (start_bits != NULL)
2811            {
2812            while (current_subject < end_subject)
2813              {
2814              register unsigned int c = *current_subject;
2815              if ((start_bits[c/8] & (1 << (c&7))) == 0) current_subject++;
2816                else break;
2817            }            }
         else  
 #endif  
         while (current_subject < end_subject && !WAS_NEWLINE(current_subject))  
           current_subject++;  
   
         /* If we have just passed a CR and the newline option is ANY or  
         ANYCRLF, and we are now at a LF, advance the match position by one more  
         character. */  
   
         if (current_subject[-1] == '\r' &&  
              (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&  
              current_subject < end_subject &&  
              *current_subject == '\n')  
           current_subject++;  
2818          }          }
2819        }        }
   
     /* Or to a non-unique first char after study */  
   
     else if (start_bits != NULL)  
       {  
       while (current_subject < end_subject)  
         {  
         register unsigned int c = *current_subject;  
         if ((start_bits[c/8] & (1 << (c&7))) == 0) current_subject++;  
           else break;  
         }  
       }  
2820    
2821      /* Restore fudged end_subject */      /* Restore fudged end_subject */
2822    
# Line 2824  for (;;) Line 2836  for (;;)
2836    showed up when somebody was matching /^C/ on a 32-megabyte string... so we    showed up when somebody was matching /^C/ on a 32-megabyte string... so we
2837    don't do this when the string is sufficiently long.    don't do this when the string is sufficiently long.
2838    
2839    ALSO: this processing is disabled when partial matching is requested.    ALSO: this processing is disabled when partial matching is requested, and can
2840    */    also be explicitly deactivated. */
2841    
2842    if (req_byte >= 0 &&    if ((options & PCRE_NO_START_OPTIMIZE) == 0 &&
2843          req_byte >= 0 &&
2844        end_subject - current_subject < REQ_BYTE_MAX &&        end_subject - current_subject < REQ_BYTE_MAX &&
2845        (options & PCRE_PARTIAL) == 0)        (options & PCRE_PARTIAL) == 0)
2846      {      {

Legend:
Removed from v.365  
changed lines
  Added in v.389

  ViewVC Help
Powered by ViewVC 1.1.5