/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 144 by ph10, Mon Apr 2 13:32:07 2007 UTC revision 227 by ph10, Tue Aug 21 15:00:15 2007 UTC
# Line 36  POSSIBILITY OF SUCH DAMAGE. Line 36  POSSIBILITY OF SUCH DAMAGE.
36  */  */
37    
38    
39    #ifdef HAVE_CONFIG_H
40    #include <config.h>
41    #endif
42    
43  #include <ctype.h>  #include <ctype.h>
44  #include <stdio.h>  #include <stdio.h>
45  #include <string.h>  #include <string.h>
# Line 67  input mode under Windows. */ Line 71  input mode under Windows. */
71  #endif  #endif
72    
73    
74  #define PCRE_SPY        /* For Win32 build, import data, not export */  /* We have to include pcre_internal.h because we need the internal info for
75    displaying the results of pcre_study() and we also need to know about the
76  /* We include pcre_internal.h because we need the internal info for displaying  internal macros, structures, and other internal data values; pcretest has
77  the results of pcre_study() and we also need to know about the internal  "inside information" compared to a program that strictly follows the PCRE API.
78  macros, structures, and other internal data values; pcretest has "inside  
79  information" compared to a program that strictly follows the PCRE API. */  Although pcre_internal.h does itself include pcre.h, we explicitly include it
80    here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
81    appropriately for an application, not for building PCRE. */
82    
83    #include "pcre.h"
84  #include "pcre_internal.h"  #include "pcre_internal.h"
85    
86  /* We need access to the data tables that PCRE uses. So as not to have to keep  /* We need access to the data tables that PCRE uses. So as not to have to keep
# Line 149  static int callout_count; Line 156  static int callout_count;
156  static int callout_extra;  static int callout_extra;
157  static int callout_fail_count;  static int callout_fail_count;
158  static int callout_fail_id;  static int callout_fail_id;
159    static int debug_lengths;
160  static int first_callout;  static int first_callout;
161  static int locale_set = 0;  static int locale_set = 0;
162  static int show_malloc;  static int show_malloc;
# Line 656  return count; Line 664  return count;
664    
665    
666  /*************************************************  /*************************************************
667    *         Case-independent strncmp() function    *
668    *************************************************/
669    
670    /*
671    Arguments:
672      s         first string
673      t         second string
674      n         number of characters to compare
675    
676    Returns:    < 0, = 0, or > 0, according to the comparison
677    */
678    
679    static int
680    strncmpic(uschar *s, uschar *t, int n)
681    {
682    while (n--)
683      {
684      int c = tolower(*s++) - tolower(*t++);
685      if (c) return c;
686      }
687    return 0;
688    }
689    
690    
691    
692    /*************************************************
693  *         Check newline indicator                *  *         Check newline indicator                *
694  *************************************************/  *************************************************/
695    
696  /* This is used both at compile and run-time to check for <xxx> escapes, where  /* This is used both at compile and run-time to check for <xxx> escapes, where
697  xxx is LF, CR, CRLF, or ANY. Print a message and return 0 if there is no match.  xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is
698    no match.
699    
700  Arguments:  Arguments:
701    p           points after the leading '<'    p           points after the leading '<'
# Line 672  Returns:      appropriate PCRE_NEWLINE_x Line 707  Returns:      appropriate PCRE_NEWLINE_x
707  static int  static int
708  check_newline(uschar *p, FILE *f)  check_newline(uschar *p, FILE *f)
709  {  {
710  if (strncmp((char *)p, "cr>", 3) == 0) return PCRE_NEWLINE_CR;  if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
711  if (strncmp((char *)p, "lf>", 3) == 0) return PCRE_NEWLINE_LF;  if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
712  if (strncmp((char *)p, "crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;  if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
713  if (strncmp((char *)p, "any>", 4) == 0) return PCRE_NEWLINE_ANY;  if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
714    if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
715  fprintf(f, "Unknown newline type at: <%s\n", p);  fprintf(f, "Unknown newline type at: <%s\n", p);
716  return 0;  return 0;
717  }  }
# Line 847  while (argc > 1 && argv[op][0] == '-') Line 883  while (argc > 1 && argv[op][0] == '-')
883      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
884      printf("  Newline sequence is %s\n", (rc == '\r')? "CR" :      printf("  Newline sequence is %s\n", (rc == '\r')? "CR" :
885        (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :        (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :
886          (rc == -2)? "ANYCRLF" :
887        (rc == -1)? "ANY" : "???");        (rc == -1)? "ANY" : "???");
888      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
889      printf("  Internal link size = %d\n", rc);      printf("  Internal link size = %d\n", rc);
# Line 884  offsets = (int *)malloc(size_offsets_max Line 921  offsets = (int *)malloc(size_offsets_max
921  if (offsets == NULL)  if (offsets == NULL)
922    {    {
923    printf("** Failed to get %d bytes of memory for offsets vector\n",    printf("** Failed to get %d bytes of memory for offsets vector\n",
924      size_offsets_max * sizeof(int));      (int)(size_offsets_max * sizeof(int)));
925    yield = 1;    yield = 1;
926    goto EXIT;    goto EXIT;
927    }    }
# Line 944  while (!done) Line 981  while (!done)
981    size_t size, regex_gotten_store;    size_t size, regex_gotten_store;
982    int do_study = 0;    int do_study = 0;
983    int do_debug = debug;    int do_debug = debug;
   int debug_lengths = 1;  
984    int do_G = 0;    int do_G = 0;
985    int do_g = 0;    int do_g = 0;
986    int do_showinfo = showinfo;    int do_showinfo = showinfo;
# Line 953  while (!done) Line 989  while (!done)
989    int erroroffset, len, delimiter, poffset;    int erroroffset, len, delimiter, poffset;
990    
991    use_utf8 = 0;    use_utf8 = 0;
992      debug_lengths = 1;
993    
994    if (infile == stdin) printf("  re> ");    if (infile == stdin) printf("  re> ");
995    if (extend_inputline(infile, buffer) == NULL) break;    if (extend_inputline(infile, buffer) == NULL) break;
# Line 1346  while (!done) Line 1383  while (!done)
1383  #if !defined NOINFOCHECK  #if !defined NOINFOCHECK
1384        int old_first_char, old_options, old_count;        int old_first_char, old_options, old_count;
1385  #endif  #endif
1386        int count, backrefmax, first_char, need_char;        int count, backrefmax, first_char, need_char, okpartial, jchanged,
1387            hascrorlf;
1388        int nameentrysize, namecount;        int nameentrysize, namecount;
1389        const uschar *nametable;        const uschar *nametable;
1390    
# Line 1359  while (!done) Line 1397  while (!done)
1397        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1398        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1399        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1400          new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1401          new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1402          new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1403    
1404  #if !defined NOINFOCHECK  #if !defined NOINFOCHECK
1405        old_count = pcre_info(re, &old_options, &old_first_char);        old_count = pcre_info(re, &old_options, &old_first_char);
# Line 1400  while (!done) Line 1441  while (!done)
1441            }            }
1442          }          }
1443    
1444        /* The NOPARTIAL bit is a private bit in the options, so we have        if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1445        to fish it out via out back door */        if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
1446    
1447        all_options = ((real_pcre *)re)->options;        all_options = ((real_pcre *)re)->options;
1448        if (do_flip)        if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
         {  
         all_options = byteflip(all_options, sizeof(all_options));  
          }  
   
       if ((all_options & PCRE_NOPARTIAL) != 0)  
         fprintf(outfile, "Partial matching not supported\n");  
1449    
1450        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
1451          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
# Line 1428  while (!done) Line 1463  while (!done)
1463            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1464            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1465    
1466          if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1467    
1468        switch (get_options & PCRE_NEWLINE_BITS)        switch (get_options & PCRE_NEWLINE_BITS)
1469          {          {
1470          case PCRE_NEWLINE_CR:          case PCRE_NEWLINE_CR:
# Line 1442  while (!done) Line 1479  while (!done)
1479          fprintf(outfile, "Forced newline sequence: CRLF\n");          fprintf(outfile, "Forced newline sequence: CRLF\n");
1480          break;          break;
1481    
1482            case PCRE_NEWLINE_ANYCRLF:
1483            fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1484            break;
1485    
1486          case PCRE_NEWLINE_ANY:          case PCRE_NEWLINE_ANY:
1487          fprintf(outfile, "Forced newline sequence: ANY\n");          fprintf(outfile, "Forced newline sequence: ANY\n");
1488          break;          break;
# Line 1591  while (!done) Line 1632  while (!done)
1632    for (;;)    for (;;)
1633      {      {
1634      uschar *q;      uschar *q;
1635      uschar *bptr = dbuffer;      uschar *bptr;
1636      int *use_offsets = offsets;      int *use_offsets = offsets;
1637      int use_size_offsets = size_offsets;      int use_size_offsets = size_offsets;
1638      int callout_data = 0;      int callout_data = 0;
# Line 1647  while (!done) Line 1688  while (!done)
1688      p = buffer;      p = buffer;
1689      while (isspace(*p)) p++;      while (isspace(*p)) p++;
1690    
1691      q = dbuffer;      bptr = q = dbuffer;
1692      while ((c = *p++) != 0)      while ((c = *p++) != 0)
1693        {        {
1694        int i = 0;        int i = 0;
# Line 1842  while (!done) Line 1883  while (!done)
1883            if (offsets == NULL)            if (offsets == NULL)
1884              {              {
1885              printf("** Failed to get %d bytes of memory for offsets vector\n",              printf("** Failed to get %d bytes of memory for offsets vector\n",
1886                size_offsets_max * sizeof(int));                (int)(size_offsets_max * sizeof(int)));
1887              yield = 1;              yield = 1;
1888              goto EXIT;              goto EXIT;
1889              }              }
# Line 2215  while (!done) Line 2256  while (!done)
2256        to advance the start offset, and continue. We won't be at the end of the        to advance the start offset, and continue. We won't be at the end of the
2257        string - that was checked before setting g_notempty.        string - that was checked before setting g_notempty.
2258    
2259        Complication arises in the case when the newline option is "any".        Complication arises in the case when the newline option is "any" or
2260        If the previous match was at the end of a line terminated by CRLF, an        "anycrlf". If the previous match was at the end of a line terminated by
2261        advance of one character just passes the \r, whereas we should prefer the        CRLF, an advance of one character just passes the \r, whereas we should
2262        longer newline sequence, as does the code in pcre_exec(). Fudge the        prefer the longer newline sequence, as does the code in pcre_exec().
2263        offset value to achieve this.        Fudge the offset value to achieve this.
2264    
2265        Otherwise, in the case of UTF-8 matching, the advance must be one        Otherwise, in the case of UTF-8 matching, the advance must be one
2266        character, not one byte. */        character, not one byte. */
# Line 2229  while (!done) Line 2270  while (!done)
2270          if (g_notempty != 0)          if (g_notempty != 0)
2271            {            {
2272            int onechar = 1;            int onechar = 1;
2273              unsigned int obits = ((real_pcre *)re)->options;
2274            use_offsets[0] = start_offset;            use_offsets[0] = start_offset;
2275            if ((((real_pcre *)re)->options & PCRE_NEWLINE_BITS) ==            if ((obits & PCRE_NEWLINE_BITS) == 0)
2276                    PCRE_NEWLINE_ANY &&              {
2277                int d;
2278                (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2279                obits = (d == '\r')? PCRE_NEWLINE_CR :
2280                        (d == '\n')? PCRE_NEWLINE_LF :
2281                        (d == ('\r'<<8 | '\n'))? PCRE_NEWLINE_CRLF :
2282                        (d == -2)? PCRE_NEWLINE_ANYCRLF :
2283                        (d == -1)? PCRE_NEWLINE_ANY : 0;
2284                }
2285              if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2286                   (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2287                  &&
2288                start_offset < len - 1 &&                start_offset < len - 1 &&
2289                bptr[start_offset] == '\r' &&                bptr[start_offset] == '\r' &&
2290                bptr[start_offset+1] == '\n')                bptr[start_offset+1] == '\n')

Legend:
Removed from v.144  
changed lines
  Added in v.227

  ViewVC Help
Powered by ViewVC 1.1.5