/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 141 by ph10, Fri Mar 30 15:46:27 2007 UTC revision 146 by ph10, Thu Apr 5 09:17:28 2007 UTC
# Line 67  input mode under Windows. */ Line 67  input mode under Windows. */
67  #endif  #endif
68    
69    
70  #define PCRE_SPY        /* For Win32 build, import data, not export */  /* We have to include pcre_internal.h because we need the internal info for
71    displaying the results of pcre_study() and we also need to know about the
72  /* We include pcre_internal.h because we need the internal info for displaying  internal macros, structures, and other internal data values; pcretest has
73  the results of pcre_study() and we also need to know about the internal  "inside information" compared to a program that strictly follows the PCRE API.
74  macros, structures, and other internal data values; pcretest has "inside  
75  information" compared to a program that strictly follows the PCRE API. */  Although pcre_internal.h does itself include pcre.h, we explicitly include it
76    here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
77    appropriately for an application, not for building PCRE. */
78    
79    #include "pcre.h"
80  #include "pcre_internal.h"  #include "pcre_internal.h"
81    
82  /* We need access to the data tables that PCRE uses. So as not to have to keep  /* We need access to the data tables that PCRE uses. So as not to have to keep
# Line 1972  while (!done) Line 1975  while (!done)
1975    
1976      for (;; gmatched++)    /* Loop for /g or /G */      for (;; gmatched++)    /* Loop for /g or /G */
1977        {        {
       int gany_fudge;  
1978        if (timeitm > 0)        if (timeitm > 0)
1979          {          {
1980          register int i;          register int i;
# Line 2212  while (!done) Line 2214  while (!done)
2214          }          }
2215    
2216        /* Failed to match. If this is a /g or /G loop and we previously set        /* Failed to match. If this is a /g or /G loop and we previously set
2217        g_notempty after a null match, this is not necessarily the end.        g_notempty after a null match, this is not necessarily the end. We want
2218        We want to advance the start offset, and continue. In the case of UTF-8        to advance the start offset, and continue. We won't be at the end of the
2219        matching, the advance must be one character, not one byte. Fudge the        string - that was checked before setting g_notempty.
2220        offset values to achieve this. We won't be at the end of the string -  
2221        that was checked before setting g_notempty. */        Complication arises in the case when the newline option is "any".
2222          If the previous match was at the end of a line terminated by CRLF, an
2223          advance of one character just passes the \r, whereas we should prefer the
2224          longer newline sequence, as does the code in pcre_exec(). Fudge the
2225          offset value to achieve this.
2226    
2227          Otherwise, in the case of UTF-8 matching, the advance must be one
2228          character, not one byte. */
2229    
2230        else        else
2231          {          {
2232          if (g_notempty != 0)          if (g_notempty != 0)
2233            {            {
2234            int onechar = 1;            int onechar = 1;
2235              unsigned int obits = ((real_pcre *)re)->options;
2236            use_offsets[0] = start_offset;            use_offsets[0] = start_offset;
2237            if (use_utf8)            if ((obits & PCRE_NEWLINE_BITS) == 0)
2238                {
2239                int d;
2240                (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2241                obits = (d == '\r')? PCRE_NEWLINE_CR :
2242                        (d == '\n')? PCRE_NEWLINE_LF :
2243                        (d == ('\r'<<8 | '\n'))? PCRE_NEWLINE_CRLF :
2244                        (d == -1)? PCRE_NEWLINE_ANY : 0;
2245                }
2246              if ((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY &&
2247                  start_offset < len - 1 &&
2248                  bptr[start_offset] == '\r' &&
2249                  bptr[start_offset+1] == '\n')
2250                onechar++;
2251              else if (use_utf8)
2252              {              {
2253              while (start_offset + onechar < len)              while (start_offset + onechar < len)
2254                {                {
# Line 2256  while (!done) Line 2280  while (!done)
2280        what Perl's /g options does. This turns out to be rather cunning. First        what Perl's /g options does. This turns out to be rather cunning. First
2281        we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the        we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
2282        same point. If this fails (picked up above) we advance to the next        same point. If this fails (picked up above) we advance to the next
2283        character.        character. */
   
       Yet more complication arises in the case when the newline option is  
       "any" and a pattern in multiline mode has to match at the start of a  
       line. If a previous match was at the end of a line, and advance of one  
       character just passes the \r, whereas we should prefer the longer newline  
       sequence, as does the code in pcre_exec(). So we fudge it. */  
2284    
2285        g_notempty = 0;        g_notempty = 0;
2286        gany_fudge = 0;  
   
2287        if (use_offsets[0] == use_offsets[1])        if (use_offsets[0] == use_offsets[1])
2288          {          {
2289          if (use_offsets[0] == len) break;          if (use_offsets[0] == len) break;
2290          g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;          g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
         if ((((real_pcre *)re)->options & PCRE_STARTLINE) != 0 &&  
             (((real_pcre *)re)->options & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY &&  
             use_offsets[0] < len - 1 &&  
             bptr[use_offsets[0]] == '\r' &&  
             bptr[use_offsets[0]+1] == '\n')  
           gany_fudge = 1;  
2291          }          }
2292    
2293        /* For /g, update the start offset, leaving the rest alone */        /* For /g, update the start offset, leaving the rest alone */
2294    
2295        if (do_g) start_offset = use_offsets[1] + gany_fudge;        if (do_g) start_offset = use_offsets[1];
2296    
2297        /* For /G, update the pointer and length */        /* For /G, update the pointer and length */
2298    
2299        else        else
2300          {          {
2301          bptr += use_offsets[1] + gany_fudge;          bptr += use_offsets[1];
2302          len -= use_offsets[1] + gany_fudge;          len -= use_offsets[1];
2303          }          }
2304        }  /* End of loop for /g and /G */        }  /* End of loop for /g and /G */
2305    

Legend:
Removed from v.141  
changed lines
  Added in v.146

  ViewVC Help
Powered by ViewVC 1.1.5