/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 142 by ph10, Fri Mar 30 15:55:18 2007 UTC revision 227 by ph10, Tue Aug 21 15:00:15 2007 UTC
# Line 36  POSSIBILITY OF SUCH DAMAGE. Line 36  POSSIBILITY OF SUCH DAMAGE.
36  */  */
37    
38    
39    #ifdef HAVE_CONFIG_H
40    #include <config.h>
41    #endif
42    
43  #include <ctype.h>  #include <ctype.h>
44  #include <stdio.h>  #include <stdio.h>
45  #include <string.h>  #include <string.h>
# Line 67  input mode under Windows. */ Line 71  input mode under Windows. */
71  #endif  #endif
72    
73    
74  #define PCRE_SPY        /* For Win32 build, import data, not export */  /* We have to include pcre_internal.h because we need the internal info for
75    displaying the results of pcre_study() and we also need to know about the
76  /* We include pcre_internal.h because we need the internal info for displaying  internal macros, structures, and other internal data values; pcretest has
77  the results of pcre_study() and we also need to know about the internal  "inside information" compared to a program that strictly follows the PCRE API.
78  macros, structures, and other internal data values; pcretest has "inside  
79  information" compared to a program that strictly follows the PCRE API. */  Although pcre_internal.h does itself include pcre.h, we explicitly include it
80    here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
81    appropriately for an application, not for building PCRE. */
82    
83    #include "pcre.h"
84  #include "pcre_internal.h"  #include "pcre_internal.h"
85    
86  /* We need access to the data tables that PCRE uses. So as not to have to keep  /* We need access to the data tables that PCRE uses. So as not to have to keep
# Line 149  static int callout_count; Line 156  static int callout_count;
156  static int callout_extra;  static int callout_extra;
157  static int callout_fail_count;  static int callout_fail_count;
158  static int callout_fail_id;  static int callout_fail_id;
159    static int debug_lengths;
160  static int first_callout;  static int first_callout;
161  static int locale_set = 0;  static int locale_set = 0;
162  static int show_malloc;  static int show_malloc;
# Line 656  return count; Line 664  return count;
664    
665    
666  /*************************************************  /*************************************************
667    *         Case-independent strncmp() function    *
668    *************************************************/
669    
670    /*
671    Arguments:
672      s         first string
673      t         second string
674      n         number of characters to compare
675    
676    Returns:    < 0, = 0, or > 0, according to the comparison
677    */
678    
679    static int
680    strncmpic(uschar *s, uschar *t, int n)
681    {
682    while (n--)
683      {
684      int c = tolower(*s++) - tolower(*t++);
685      if (c) return c;
686      }
687    return 0;
688    }
689    
690    
691    
692    /*************************************************
693  *         Check newline indicator                *  *         Check newline indicator                *
694  *************************************************/  *************************************************/
695    
696  /* This is used both at compile and run-time to check for <xxx> escapes, where  /* This is used both at compile and run-time to check for <xxx> escapes, where
697  xxx is LF, CR, CRLF, or ANY. Print a message and return 0 if there is no match.  xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is
698    no match.
699    
700  Arguments:  Arguments:
701    p           points after the leading '<'    p           points after the leading '<'
# Line 672  Returns:      appropriate PCRE_NEWLINE_x Line 707  Returns:      appropriate PCRE_NEWLINE_x
707  static int  static int
708  check_newline(uschar *p, FILE *f)  check_newline(uschar *p, FILE *f)
709  {  {
710  if (strncmp((char *)p, "cr>", 3) == 0) return PCRE_NEWLINE_CR;  if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
711  if (strncmp((char *)p, "lf>", 3) == 0) return PCRE_NEWLINE_LF;  if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
712  if (strncmp((char *)p, "crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;  if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
713  if (strncmp((char *)p, "any>", 4) == 0) return PCRE_NEWLINE_ANY;  if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
714    if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
715  fprintf(f, "Unknown newline type at: <%s\n", p);  fprintf(f, "Unknown newline type at: <%s\n", p);
716  return 0;  return 0;
717  }  }
# Line 847  while (argc > 1 && argv[op][0] == '-') Line 883  while (argc > 1 && argv[op][0] == '-')
883      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
884      printf("  Newline sequence is %s\n", (rc == '\r')? "CR" :      printf("  Newline sequence is %s\n", (rc == '\r')? "CR" :
885        (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :        (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :
886          (rc == -2)? "ANYCRLF" :
887        (rc == -1)? "ANY" : "???");        (rc == -1)? "ANY" : "???");
888      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
889      printf("  Internal link size = %d\n", rc);      printf("  Internal link size = %d\n", rc);
# Line 884  offsets = (int *)malloc(size_offsets_max Line 921  offsets = (int *)malloc(size_offsets_max
921  if (offsets == NULL)  if (offsets == NULL)
922    {    {
923    printf("** Failed to get %d bytes of memory for offsets vector\n",    printf("** Failed to get %d bytes of memory for offsets vector\n",
924      size_offsets_max * sizeof(int));      (int)(size_offsets_max * sizeof(int)));
925    yield = 1;    yield = 1;
926    goto EXIT;    goto EXIT;
927    }    }
# Line 944  while (!done) Line 981  while (!done)
981    size_t size, regex_gotten_store;    size_t size, regex_gotten_store;
982    int do_study = 0;    int do_study = 0;
983    int do_debug = debug;    int do_debug = debug;
   int debug_lengths = 1;  
984    int do_G = 0;    int do_G = 0;
985    int do_g = 0;    int do_g = 0;
986    int do_showinfo = showinfo;    int do_showinfo = showinfo;
# Line 953  while (!done) Line 989  while (!done)
989    int erroroffset, len, delimiter, poffset;    int erroroffset, len, delimiter, poffset;
990    
991    use_utf8 = 0;    use_utf8 = 0;
992      debug_lengths = 1;
993    
994    if (infile == stdin) printf("  re> ");    if (infile == stdin) printf("  re> ");
995    if (extend_inputline(infile, buffer) == NULL) break;    if (extend_inputline(infile, buffer) == NULL) break;
# Line 1346  while (!done) Line 1383  while (!done)
1383  #if !defined NOINFOCHECK  #if !defined NOINFOCHECK
1384        int old_first_char, old_options, old_count;        int old_first_char, old_options, old_count;
1385  #endif  #endif
1386        int count, backrefmax, first_char, need_char;        int count, backrefmax, first_char, need_char, okpartial, jchanged,
1387            hascrorlf;
1388        int nameentrysize, namecount;        int nameentrysize, namecount;
1389        const uschar *nametable;        const uschar *nametable;
1390    
# Line 1359  while (!done) Line 1397  while (!done)
1397        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1398        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1399        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1400          new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1401          new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1402          new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1403    
1404  #if !defined NOINFOCHECK  #if !defined NOINFOCHECK
1405        old_count = pcre_info(re, &old_options, &old_first_char);        old_count = pcre_info(re, &old_options, &old_first_char);
# Line 1400  while (!done) Line 1441  while (!done)
1441            }            }
1442          }          }
1443    
1444        /* The NOPARTIAL bit is a private bit in the options, so we have        if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1445        to fish it out via out back door */        if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
1446    
1447        all_options = ((real_pcre *)re)->options;        all_options = ((real_pcre *)re)->options;
1448        if (do_flip)        if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
         {  
         all_options = byteflip(all_options, sizeof(all_options));  
          }  
   
       if ((all_options & PCRE_NOPARTIAL) != 0)  
         fprintf(outfile, "Partial matching not supported\n");  
1449    
1450        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
1451          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
# Line 1428  while (!done) Line 1463  while (!done)
1463            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1464            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1465    
1466          if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1467    
1468        switch (get_options & PCRE_NEWLINE_BITS)        switch (get_options & PCRE_NEWLINE_BITS)
1469          {          {
1470          case PCRE_NEWLINE_CR:          case PCRE_NEWLINE_CR:
# Line 1442  while (!done) Line 1479  while (!done)
1479          fprintf(outfile, "Forced newline sequence: CRLF\n");          fprintf(outfile, "Forced newline sequence: CRLF\n");
1480          break;          break;
1481    
1482            case PCRE_NEWLINE_ANYCRLF:
1483            fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1484            break;
1485    
1486          case PCRE_NEWLINE_ANY:          case PCRE_NEWLINE_ANY:
1487          fprintf(outfile, "Forced newline sequence: ANY\n");          fprintf(outfile, "Forced newline sequence: ANY\n");
1488          break;          break;
# Line 1591  while (!done) Line 1632  while (!done)
1632    for (;;)    for (;;)
1633      {      {
1634      uschar *q;      uschar *q;
1635      uschar *bptr = dbuffer;      uschar *bptr;
1636      int *use_offsets = offsets;      int *use_offsets = offsets;
1637      int use_size_offsets = size_offsets;      int use_size_offsets = size_offsets;
1638      int callout_data = 0;      int callout_data = 0;
# Line 1647  while (!done) Line 1688  while (!done)
1688      p = buffer;      p = buffer;
1689      while (isspace(*p)) p++;      while (isspace(*p)) p++;
1690    
1691      q = dbuffer;      bptr = q = dbuffer;
1692      while ((c = *p++) != 0)      while ((c = *p++) != 0)
1693        {        {
1694        int i = 0;        int i = 0;
# Line 1842  while (!done) Line 1883  while (!done)
1883            if (offsets == NULL)            if (offsets == NULL)
1884              {              {
1885              printf("** Failed to get %d bytes of memory for offsets vector\n",              printf("** Failed to get %d bytes of memory for offsets vector\n",
1886                size_offsets_max * sizeof(int));                (int)(size_offsets_max * sizeof(int)));
1887              yield = 1;              yield = 1;
1888              goto EXIT;              goto EXIT;
1889              }              }
# Line 1972  while (!done) Line 2013  while (!done)
2013    
2014      for (;; gmatched++)    /* Loop for /g or /G */      for (;; gmatched++)    /* Loop for /g or /G */
2015        {        {
       int gany_fudge;  
2016        if (timeitm > 0)        if (timeitm > 0)
2017          {          {
2018          register int i;          register int i;
# Line 2212  while (!done) Line 2252  while (!done)
2252          }          }
2253    
2254        /* Failed to match. If this is a /g or /G loop and we previously set        /* Failed to match. If this is a /g or /G loop and we previously set
2255        g_notempty after a null match, this is not necessarily the end.        g_notempty after a null match, this is not necessarily the end. We want
2256        We want to advance the start offset, and continue. In the case of UTF-8        to advance the start offset, and continue. We won't be at the end of the
2257        matching, the advance must be one character, not one byte. Fudge the        string - that was checked before setting g_notempty.
2258        offset values to achieve this. We won't be at the end of the string -  
2259        that was checked before setting g_notempty. */        Complication arises in the case when the newline option is "any" or
2260          "anycrlf". If the previous match was at the end of a line terminated by
2261          CRLF, an advance of one character just passes the \r, whereas we should
2262          prefer the longer newline sequence, as does the code in pcre_exec().
2263          Fudge the offset value to achieve this.
2264    
2265          Otherwise, in the case of UTF-8 matching, the advance must be one
2266          character, not one byte. */
2267    
2268        else        else
2269          {          {
2270          if (g_notempty != 0)          if (g_notempty != 0)
2271            {            {
2272            int onechar = 1;            int onechar = 1;
2273              unsigned int obits = ((real_pcre *)re)->options;
2274            use_offsets[0] = start_offset;            use_offsets[0] = start_offset;
2275            if (use_utf8)            if ((obits & PCRE_NEWLINE_BITS) == 0)
2276                {
2277                int d;
2278                (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2279                obits = (d == '\r')? PCRE_NEWLINE_CR :
2280                        (d == '\n')? PCRE_NEWLINE_LF :
2281                        (d == ('\r'<<8 | '\n'))? PCRE_NEWLINE_CRLF :
2282                        (d == -2)? PCRE_NEWLINE_ANYCRLF :
2283                        (d == -1)? PCRE_NEWLINE_ANY : 0;
2284                }
2285              if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2286                   (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2287                  &&
2288                  start_offset < len - 1 &&
2289                  bptr[start_offset] == '\r' &&
2290                  bptr[start_offset+1] == '\n')
2291                onechar++;
2292              else if (use_utf8)
2293              {              {
2294              while (start_offset + onechar < len)              while (start_offset + onechar < len)
2295                {                {
# Line 2256  while (!done) Line 2321  while (!done)
2321        what Perl's /g options does. This turns out to be rather cunning. First        what Perl's /g options does. This turns out to be rather cunning. First
2322        we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the        we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
2323        same point. If this fails (picked up above) we advance to the next        same point. If this fails (picked up above) we advance to the next
2324        character.        character. */
   
       Yet more complication arises in the case when the newline option is  
       "any" and a pattern in multiline mode has to match at the start of a  
       line. If a previous match was at the end of a line, and advance of one  
       character just passes the \r, whereas we should prefer the longer newline  
       sequence, as does the code in pcre_exec(). So we fudge it. */  
2325    
2326        g_notempty = 0;        g_notempty = 0;
       gany_fudge = 0;  
2327    
2328        if (use_offsets[0] == use_offsets[1])        if (use_offsets[0] == use_offsets[1])
2329          {          {
2330          if (use_offsets[0] == len) break;          if (use_offsets[0] == len) break;
2331          g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;          g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
         if ((((real_pcre *)re)->options & PCRE_STARTLINE) != 0 &&  
             (((real_pcre *)re)->options & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY &&  
             use_offsets[0] < len - 1 &&  
             bptr[use_offsets[0]] == '\r' &&  
             bptr[use_offsets[0]+1] == '\n')  
           gany_fudge = 1;  
2332          }          }
2333    
2334        /* For /g, update the start offset, leaving the rest alone */        /* For /g, update the start offset, leaving the rest alone */
2335    
2336        if (do_g) start_offset = use_offsets[1] + gany_fudge;        if (do_g) start_offset = use_offsets[1];
2337    
2338        /* For /G, update the pointer and length */        /* For /G, update the pointer and length */
2339    
2340        else        else
2341          {          {
2342          bptr += use_offsets[1] + gany_fudge;          bptr += use_offsets[1];
2343          len -= use_offsets[1] + gany_fudge;          len -= use_offsets[1];
2344          }          }
2345        }  /* End of loop for /g and /G */        }  /* End of loop for /g and /G */
2346    

Legend:
Removed from v.142  
changed lines
  Added in v.227

  ViewVC Help
Powered by ViewVC 1.1.5