/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 91 by nigel, Sat Feb 24 21:41:34 2007 UTC revision 227 by ph10, Tue Aug 21 15:00:15 2007 UTC
# Line 36  POSSIBILITY OF SUCH DAMAGE. Line 36  POSSIBILITY OF SUCH DAMAGE.
36  */  */
37    
38    
39    #ifdef HAVE_CONFIG_H
40    #include <config.h>
41    #endif
42    
43  #include <ctype.h>  #include <ctype.h>
44  #include <stdio.h>  #include <stdio.h>
45  #include <string.h>  #include <string.h>
# Line 44  POSSIBILITY OF SUCH DAMAGE. Line 48  POSSIBILITY OF SUCH DAMAGE.
48  #include <locale.h>  #include <locale.h>
49  #include <errno.h>  #include <errno.h>
50    
51  #ifndef _WIN32  
52  #include <sys/resource.h>  /* A number of things vary for Windows builds. Originally, pcretest opened its
53    input and output without "b"; then I was told that "b" was needed in some
54    environments, so it was added for release 5.0 to both the input and output. (It
55    makes no difference on Unix-like systems.) Later I was told that it is wrong
56    for the input on Windows. I've now abstracted the modes into two macros that
57    are set here, to make it easier to fiddle with them, and removed "b" from the
58    input mode under Windows. */
59    
60    #if defined(_WIN32) || defined(WIN32)
61    #include <io.h>                /* For _setmode() */
62    #include <fcntl.h>             /* For _O_BINARY */
63    #define INPUT_MODE   "r"
64    #define OUTPUT_MODE  "wb"
65    
66    #else
67    #include <sys/time.h>          /* These two includes are needed */
68    #include <sys/resource.h>      /* for setrlimit(). */
69    #define INPUT_MODE   "rb"
70    #define OUTPUT_MODE  "wb"
71  #endif  #endif
72    
 #define PCRE_SPY        /* For Win32 build, import data, not export */  
73    
74  /* We include pcre_internal.h because we need the internal info for displaying  /* We have to include pcre_internal.h because we need the internal info for
75  the results of pcre_study() and we also need to know about the internal  displaying the results of pcre_study() and we also need to know about the
76  macros, structures, and other internal data values; pcretest has "inside  internal macros, structures, and other internal data values; pcretest has
77  information" compared to a program that strictly follows the PCRE API. */  "inside information" compared to a program that strictly follows the PCRE API.
78    
79    Although pcre_internal.h does itself include pcre.h, we explicitly include it
80    here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
81    appropriately for an application, not for building PCRE. */
82    
83    #include "pcre.h"
84  #include "pcre_internal.h"  #include "pcre_internal.h"
85    
86  /* We need access to the data tables that PCRE uses. So as not to have to keep  /* We need access to the data tables that PCRE uses. So as not to have to keep
# Line 74  symbols to prevent clashes. */ Line 100  symbols to prevent clashes. */
100    
101  /* We also need the pcre_printint() function for printing out compiled  /* We also need the pcre_printint() function for printing out compiled
102  patterns. This function is in a separate file so that it can be included in  patterns. This function is in a separate file so that it can be included in
103  pcre_compile.c when that module is compiled with debugging enabled. */  pcre_compile.c when that module is compiled with debugging enabled.
104    
105    The definition of the macro PRINTABLE, which determines whether to print an
106    output character as-is or as a hex value when showing compiled patterns, is
107    contained in this file. We uses it here also, in cases when the locale has not
108    been explicitly changed, so as to get consistent output from systems that
109    differ in their output from isprint() even in the "C" locale. */
110    
111  #include "pcre_printint.src"  #include "pcre_printint.src"
112    
113    #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
114    
115    
116  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
117  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 87  Makefile. */ Line 121  Makefile. */
121  #include "pcreposix.h"  #include "pcreposix.h"
122  #endif  #endif
123    
124  /* It is also possible, for the benefit of the version imported into Exim, to  /* It is also possible, for the benefit of the version currently imported into
125  build pcretest without support for UTF8 (define NOUTF8), without the interface  Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
126  to the DFA matcher (NODFA), and without the doublecheck of the old "info"  interface to the DFA matcher (NODFA), and without the doublecheck of the old
127  function (define NOINFOCHECK). */  "info" function (define NOINFOCHECK). In fact, we automatically cut out the
128    UTF8 support if PCRE is built without it. */
129    
130    #ifndef SUPPORT_UTF8
131    #ifndef NOUTF8
132    #define NOUTF8
133    #endif
134    #endif
135    
136    
137  /* Other parameters */  /* Other parameters */
# Line 103  function (define NOINFOCHECK). */ Line 144  function (define NOINFOCHECK). */
144  #endif  #endif
145  #endif  #endif
146    
147    /* This is the default loop count for timing. */
148    
149  #define LOOPREPEAT 500000  #define LOOPREPEAT 500000
150    
151  /* Static variables */  /* Static variables */
# Line 113  static int callout_count; Line 156  static int callout_count;
156  static int callout_extra;  static int callout_extra;
157  static int callout_fail_count;  static int callout_fail_count;
158  static int callout_fail_id;  static int callout_fail_id;
159    static int debug_lengths;
160  static int first_callout;  static int first_callout;
161    static int locale_set = 0;
162  static int show_malloc;  static int show_malloc;
163  static int use_utf8;  static int use_utf8;
164  static size_t gotten_store;  static size_t gotten_store;
# Line 157  uschar *here = start; Line 202  uschar *here = start;
202  for (;;)  for (;;)
203    {    {
204    int rlen = buffer_size - (here - buffer);    int rlen = buffer_size - (here - buffer);
205    
206    if (rlen > 1000)    if (rlen > 1000)
207      {      {
208      int dlen;      int dlen;
# Line 213  return NULL;  /* Control never gets here Line 259  return NULL;  /* Control never gets here
259    
260  /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess  /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
261  around with conditional compilation, just do the job by hand. It is only used  around with conditional compilation, just do the job by hand. It is only used
262  for unpicking the -o argument, so just keep it simple.  for unpicking arguments, so just keep it simple.
263    
264  Arguments:  Arguments:
265    str           string to be converted    str           string to be converted
# Line 311  Arguments: Line 357  Arguments:
357  Returns:     number of characters placed in the buffer  Returns:     number of characters placed in the buffer
358  */  */
359    
360    #if !defined NOUTF8
361    
362  static int  static int
363  ord2utf8(int cvalue, uschar *utf8bytes)  ord2utf8(int cvalue, uschar *utf8bytes)
364  {  {
# Line 327  for (j = i; j > 0; j--) Line 375  for (j = i; j > 0; j--)
375  return i + 1;  return i + 1;
376  }  }
377    
378    #endif
379    
380    
381    
382  /*************************************************  /*************************************************
# Line 353  while (length-- > 0) Line 403  while (length-- > 0)
403        {        {
404        length -= rc - 1;        length -= rc - 1;
405        p += rc;        p += rc;
406        if (c < 256 && isprint(c))        if (PRINTHEX(c))
407          {          {
408          if (f != NULL) fprintf(f, "%c", c);          if (f != NULL) fprintf(f, "%c", c);
409          yield++;          yield++;
410          }          }
411        else        else
412          {          {
413          int n;          int n = 4;
414          if (f != NULL) fprintf(f, "\\x{%02x}%n", c, &n);          if (f != NULL) fprintf(f, "\\x{%02x}", c);
415          yield += n;          yield += (n <= 0x000000ff)? 2 :
416                     (n <= 0x00000fff)? 3 :
417                     (n <= 0x0000ffff)? 4 :
418                     (n <= 0x000fffff)? 5 : 6;
419          }          }
420        continue;        continue;
421        }        }
# Line 371  while (length-- > 0) Line 424  while (length-- > 0)
424    
425     /* Not UTF-8, or malformed UTF-8  */     /* Not UTF-8, or malformed UTF-8  */
426    
427    if (isprint(c = *(p++)))    c = *p++;
428      if (PRINTHEX(c))
429      {      {
430      if (f != NULL) fprintf(f, "%c", c);      if (f != NULL) fprintf(f, "%c", c);
431      yield++;      yield++;
# Line 610  return count; Line 664  return count;
664    
665    
666  /*************************************************  /*************************************************
667    *         Case-independent strncmp() function    *
668    *************************************************/
669    
670    /*
671    Arguments:
672      s         first string
673      t         second string
674      n         number of characters to compare
675    
676    Returns:    < 0, = 0, or > 0, according to the comparison
677    */
678    
679    static int
680    strncmpic(uschar *s, uschar *t, int n)
681    {
682    while (n--)
683      {
684      int c = tolower(*s++) - tolower(*t++);
685      if (c) return c;
686      }
687    return 0;
688    }
689    
690    
691    
692    /*************************************************
693  *         Check newline indicator                *  *         Check newline indicator                *
694  *************************************************/  *************************************************/
695    
696  /* This is used both at compile and run-time to check for <xxx> escapes, where  /* This is used both at compile and run-time to check for <xxx> escapes, where
697  xxx is LF, CR, or CRLF. Print a message and return 0 if there is no match.  xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is
698    no match.
699    
700  Arguments:  Arguments:
701    p           points after the leading '<'    p           points after the leading '<'
# Line 626  Returns:      appropriate PCRE_NEWLINE_x Line 707  Returns:      appropriate PCRE_NEWLINE_x
707  static int  static int
708  check_newline(uschar *p, FILE *f)  check_newline(uschar *p, FILE *f)
709  {  {
710  if (strncmp((char *)p, "cr>", 3) == 0) return PCRE_NEWLINE_CR;  if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
711  if (strncmp((char *)p, "lf>", 3) == 0) return PCRE_NEWLINE_LF;  if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
712  if (strncmp((char *)p, "crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;  if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
713    if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
714    if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
715  fprintf(f, "Unknown newline type at: <%s\n", p);  fprintf(f, "Unknown newline type at: <%s\n", p);
716  return 0;  return 0;
717  }  }
# Line 636  return 0; Line 719  return 0;
719    
720    
721  /*************************************************  /*************************************************
722    *             Usage function                     *
723    *************************************************/
724    
725    static void
726    usage(void)
727    {
728    printf("Usage:     pcretest [options] [<input> [<output>]]\n");
729    printf("  -b       show compiled code (bytecode)\n");
730    printf("  -C       show PCRE compile-time options and exit\n");
731    printf("  -d       debug: show compiled code and information (-b and -i)\n");
732    #if !defined NODFA
733    printf("  -dfa     force DFA matching for all subjects\n");
734    #endif
735    printf("  -help    show usage information\n");
736    printf("  -i       show information about compiled patterns\n"
737           "  -m       output memory used information\n"
738           "  -o <n>   set size of offsets vector to <n>\n");
739    #if !defined NOPOSIX
740    printf("  -p       use POSIX interface\n");
741    #endif
742    printf("  -q       quiet: do not output PCRE version number at start\n");
743    printf("  -S <n>   set stack size to <n> megabytes\n");
744    printf("  -s       output store (memory) used information\n"
745           "  -t       time compilation and execution\n");
746    printf("  -t <n>   time compilation and execution, repeating <n> times\n");
747    printf("  -tm      time execution (matching) only\n");
748    printf("  -tm <n>  time execution (matching) only, repeating <n> times\n");
749    }
750    
751    
752    
753    /*************************************************
754  *                Main Program                    *  *                Main Program                    *
755  *************************************************/  *************************************************/
756    
# Line 650  int options = 0; Line 765  int options = 0;
765  int study_options = 0;  int study_options = 0;
766  int op = 1;  int op = 1;
767  int timeit = 0;  int timeit = 0;
768    int timeitm = 0;
769  int showinfo = 0;  int showinfo = 0;
770  int showstore = 0;  int showstore = 0;
771  int quiet = 0;  int quiet = 0;
# Line 681  buffer = (unsigned char *)malloc(buffer_ Line 797  buffer = (unsigned char *)malloc(buffer_
797  dbuffer = (unsigned char *)malloc(buffer_size);  dbuffer = (unsigned char *)malloc(buffer_size);
798  pbuffer = (unsigned char *)malloc(buffer_size);  pbuffer = (unsigned char *)malloc(buffer_size);
799    
800  /* The outfile variable is static so that new_malloc can use it. The _setmode()  /* The outfile variable is static so that new_malloc can use it. */
 stuff is some magic that I don't understand, but which apparently does good  
 things in Windows. It's related to line terminations.  */  
   
 #if defined(_WIN32) || defined(WIN32)  
 _setmode( _fileno( stdout ), 0x8000 );  
 #endif  /* defined(_WIN32) || defined(WIN32) */  
801    
802  outfile = stdout;  outfile = stdout;
803    
804    /* The following  _setmode() stuff is some Windows magic that tells its runtime
805    library to translate CRLF into a single LF character. At least, that's what
806    I've been told: never having used Windows I take this all on trust. Originally
807    it set 0x8000, but then I was advised that _O_BINARY was better. */
808    
809    #if defined(_WIN32) || defined(WIN32)
810    _setmode( _fileno( stdout ), _O_BINARY );
811    #endif
812    
813  /* Scan options */  /* Scan options */
814    
815  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
# Line 699  while (argc > 1 && argv[op][0] == '-') Line 818  while (argc > 1 && argv[op][0] == '-')
818    
819    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
820      showstore = 1;      showstore = 1;
   else if (strcmp(argv[op], "-t") == 0) timeit = 1;  
821    else if (strcmp(argv[op], "-q") == 0) quiet = 1;    else if (strcmp(argv[op], "-q") == 0) quiet = 1;
822      else if (strcmp(argv[op], "-b") == 0) debug = 1;
823    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
824    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
825  #if !defined NODFA  #if !defined NODFA
# Line 713  while (argc > 1 && argv[op][0] == '-') Line 832  while (argc > 1 && argv[op][0] == '-')
832      op++;      op++;
833      argc--;      argc--;
834      }      }
835      else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
836        {
837        int both = argv[op][2] == 0;
838        int temp;
839        if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
840                         *endptr == 0))
841          {
842          timeitm = temp;
843          op++;
844          argc--;
845          }
846        else timeitm = LOOPREPEAT;
847        if (both) timeit = timeitm;
848        }
849    else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&    else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
850        ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),        ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
851          *endptr == 0))          *endptr == 0))
852      {      {
853  #ifdef _WIN32  #if defined(_WIN32) || defined(WIN32)
854      printf("PCRE: -S not supported on this OS\n");      printf("PCRE: -S not supported on this OS\n");
855      exit(1);      exit(1);
856  #else  #else
# Line 749  while (argc > 1 && argv[op][0] == '-') Line 882  while (argc > 1 && argv[op][0] == '-')
882      printf("  %sUnicode properties support\n", rc? "" : "No ");      printf("  %sUnicode properties support\n", rc? "" : "No ");
883      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
884      printf("  Newline sequence is %s\n", (rc == '\r')? "CR" :      printf("  Newline sequence is %s\n", (rc == '\r')? "CR" :
885        (rc == '\n')? "LF" : "CRLF");        (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :
886          (rc == -2)? "ANYCRLF" :
887          (rc == -1)? "ANY" : "???");
888      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
889      printf("  Internal link size = %d\n", rc);      printf("  Internal link size = %d\n", rc);
890      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
# Line 760  while (argc > 1 && argv[op][0] == '-') Line 895  while (argc > 1 && argv[op][0] == '-')
895      printf("  Default recursion depth limit = %d\n", rc);      printf("  Default recursion depth limit = %d\n", rc);
896      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
897      printf("  Match recursion uses %s\n", rc? "stack" : "heap");      printf("  Match recursion uses %s\n", rc? "stack" : "heap");
898      exit(0);      goto EXIT;
899        }
900      else if (strcmp(argv[op], "-help") == 0 ||
901               strcmp(argv[op], "--help") == 0)
902        {
903        usage();
904        goto EXIT;
905      }      }
906    else    else
907      {      {
908      printf("** Unknown or malformed option %s\n", argv[op]);      printf("** Unknown or malformed option %s\n", argv[op]);
909      printf("Usage:   pcretest [options] [<input> [<output>]]\n");      usage();
     printf("  -C     show PCRE compile-time options and exit\n");  
     printf("  -d     debug: show compiled code; implies -i\n");  
 #if !defined NODFA  
     printf("  -dfa   force DFA matching for all subjects\n");  
 #endif  
     printf("  -i     show information about compiled pattern\n"  
            "  -m     output memory used information\n"  
            "  -o <n> set size of offsets vector to <n>\n");  
 #if !defined NOPOSIX  
     printf("  -p     use POSIX interface\n");  
 #endif  
     printf("  -S <n> set stack size to <n> megabytes\n");  
     printf("  -s     output store (memory) used information\n"  
            "  -t     time compilation and execution\n");  
910      yield = 1;      yield = 1;
911      goto EXIT;      goto EXIT;
912      }      }
# Line 794  offsets = (int *)malloc(size_offsets_max Line 921  offsets = (int *)malloc(size_offsets_max
921  if (offsets == NULL)  if (offsets == NULL)
922    {    {
923    printf("** Failed to get %d bytes of memory for offsets vector\n",    printf("** Failed to get %d bytes of memory for offsets vector\n",
924      size_offsets_max * sizeof(int));      (int)(size_offsets_max * sizeof(int)));
925    yield = 1;    yield = 1;
926    goto EXIT;    goto EXIT;
927    }    }
# Line 803  if (offsets == NULL) Line 930  if (offsets == NULL)
930    
931  if (argc > 1)  if (argc > 1)
932    {    {
933    infile = fopen(argv[op], "rb");    infile = fopen(argv[op], INPUT_MODE);
934    if (infile == NULL)    if (infile == NULL)
935      {      {
936      printf("** Failed to open %s\n", argv[op]);      printf("** Failed to open %s\n", argv[op]);
# Line 814  if (argc > 1) Line 941  if (argc > 1)
941    
942  if (argc > 2)  if (argc > 2)
943    {    {
944    outfile = fopen(argv[op+1], "wb");    outfile = fopen(argv[op+1], OUTPUT_MODE);
945    if (outfile == NULL)    if (outfile == NULL)
946      {      {
947      printf("** Failed to open %s\n", argv[op+1]);      printf("** Failed to open %s\n", argv[op+1]);
# Line 859  while (!done) Line 986  while (!done)
986    int do_showinfo = showinfo;    int do_showinfo = showinfo;
987    int do_showrest = 0;    int do_showrest = 0;
988    int do_flip = 0;    int do_flip = 0;
989    int erroroffset, len, delimiter;    int erroroffset, len, delimiter, poffset;
990    
991    use_utf8 = 0;    use_utf8 = 0;
992      debug_lengths = 1;
993    
994    if (infile == stdin) printf("  re> ");    if (infile == stdin) printf("  re> ");
995    if (extend_inputline(infile, buffer) == NULL) break;    if (extend_inputline(infile, buffer) == NULL) break;
# Line 969  while (!done) Line 1097  while (!done)
1097      }      }
1098    
1099    pp = p;    pp = p;
1100      poffset = p - buffer;
1101    
1102    for(;;)    for(;;)
1103      {      {
# Line 989  while (!done) Line 1118  while (!done)
1118      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1119      }      }
1120    
1121      /* The buffer may have moved while being extended; reset the start of data
1122      pointer to the correct relative point in the buffer. */
1123    
1124      p = buffer + poffset;
1125    
1126    /* If the first character after the delimiter is backslash, make    /* If the first character after the delimiter is backslash, make
1127    the pattern end with backslash. This is purely to provide a way    the pattern end with backslash. This is purely to provide a way
1128    of testing for the error message when a pattern ends with backslash. */    of testing for the error message when a pattern ends with backslash. */
# Line 1020  while (!done) Line 1154  while (!done)
1154    
1155        case '+': do_showrest = 1; break;        case '+': do_showrest = 1; break;
1156        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
1157          case 'B': do_debug = 1; break;
1158        case 'C': options |= PCRE_AUTO_CALLOUT; break;        case 'C': options |= PCRE_AUTO_CALLOUT; break;
1159        case 'D': do_debug = do_showinfo = 1; break;        case 'D': do_debug = do_showinfo = 1; break;
1160        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
# Line 1037  while (!done) Line 1172  while (!done)
1172        case 'S': do_study = 1; break;        case 'S': do_study = 1; break;
1173        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
1174        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
1175          case 'Z': debug_lengths = 0; break;
1176        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1177        case '?': options |= PCRE_NO_UTF8_CHECK; break;        case '?': options |= PCRE_NO_UTF8_CHECK; break;
1178    
1179        case 'L':        case 'L':
1180        ppp = pp;        ppp = pp;
1181        /* The '\r' test here is so that it works on Windows */        /* The '\r' test here is so that it works on Windows. */
1182        while (*ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;        /* The '0' test is just in case this is an unterminated line. */
1183          while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1184        *ppp = 0;        *ppp = 0;
1185        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1186          {          {
1187          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1188          goto SKIP_DATA;          goto SKIP_DATA;
1189          }          }
1190          locale_set = 1;
1191        tables = pcre_maketables();        tables = pcre_maketables();
1192        pp = ppp;        pp = ppp;
1193        break;        break;
# Line 1116  while (!done) Line 1254  while (!done)
1254  #endif  /* !defined NOPOSIX */  #endif  /* !defined NOPOSIX */
1255    
1256      {      {
1257      if (timeit)      if (timeit > 0)
1258        {        {
1259        register int i;        register int i;
1260        clock_t time_taken;        clock_t time_taken;
1261        clock_t start_time = clock();        clock_t start_time = clock();
1262        for (i = 0; i < LOOPREPEAT; i++)        for (i = 0; i < timeit; i++)
1263          {          {
1264          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1265          if (re != NULL) free(re);          if (re != NULL) free(re);
1266          }          }
1267        time_taken = clock() - start_time;        time_taken = clock() - start_time;
1268        fprintf(outfile, "Compile time %.3f milliseconds\n",        fprintf(outfile, "Compile time %.4f milliseconds\n",
1269          (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /          (((double)time_taken * 1000.0) / (double)timeit) /
1270            (double)CLOCKS_PER_SEC);            (double)CLOCKS_PER_SEC);
1271        }        }
1272    
# Line 1180  while (!done) Line 1318  while (!done)
1318    
1319      if (do_study)      if (do_study)
1320        {        {
1321        if (timeit)        if (timeit > 0)
1322          {          {
1323          register int i;          register int i;
1324          clock_t time_taken;          clock_t time_taken;
1325          clock_t start_time = clock();          clock_t start_time = clock();
1326          for (i = 0; i < LOOPREPEAT; i++)          for (i = 0; i < timeit; i++)
1327            extra = pcre_study(re, study_options, &error);            extra = pcre_study(re, study_options, &error);
1328          time_taken = clock() - start_time;          time_taken = clock() - start_time;
1329          if (extra != NULL) free(extra);          if (extra != NULL) free(extra);
1330          fprintf(outfile, "  Study time %.3f milliseconds\n",          fprintf(outfile, "  Study time %.4f milliseconds\n",
1331            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /            (((double)time_taken * 1000.0) / (double)timeit) /
1332              (double)CLOCKS_PER_SEC);              (double)CLOCKS_PER_SEC);
1333          }          }
1334        extra = pcre_study(re, study_options, &error);        extra = pcre_study(re, study_options, &error);
# Line 1233  while (!done) Line 1371  while (!done)
1371    
1372      SHOW_INFO:      SHOW_INFO:
1373    
1374        if (do_debug)
1375          {
1376          fprintf(outfile, "------------------------------------------------------------------\n");
1377          pcre_printint(re, outfile, debug_lengths);
1378          }
1379    
1380      if (do_showinfo)      if (do_showinfo)
1381        {        {
1382        unsigned long int get_options, all_options;        unsigned long int get_options, all_options;
1383  #if !defined NOINFOCHECK  #if !defined NOINFOCHECK
1384        int old_first_char, old_options, old_count;        int old_first_char, old_options, old_count;
1385  #endif  #endif
1386        int count, backrefmax, first_char, need_char;        int count, backrefmax, first_char, need_char, okpartial, jchanged,
1387            hascrorlf;
1388        int nameentrysize, namecount;        int nameentrysize, namecount;
1389        const uschar *nametable;        const uschar *nametable;
1390    
       if (do_debug)  
         {  
         fprintf(outfile, "------------------------------------------------------------------\n");  
         pcre_printint(re, outfile);  
         }  
   
1391        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1392        new_info(re, NULL, PCRE_INFO_SIZE, &size);        new_info(re, NULL, PCRE_INFO_SIZE, &size);
1393        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
# Line 1258  while (!done) Line 1397  while (!done)
1397        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1398        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1399        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1400          new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1401          new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1402          new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1403    
1404  #if !defined NOINFOCHECK  #if !defined NOINFOCHECK
1405        old_count = pcre_info(re, &old_options, &old_first_char);        old_count = pcre_info(re, &old_options, &old_first_char);
# Line 1299  while (!done) Line 1441  while (!done)
1441            }            }
1442          }          }
1443    
1444        /* The NOPARTIAL bit is a private bit in the options, so we have        if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1445        to fish it out via out back door */        if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
1446    
1447        all_options = ((real_pcre *)re)->options;        all_options = ((real_pcre *)re)->options;
1448        if (do_flip)        if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
         {  
         all_options = byteflip(all_options, sizeof(all_options));  
          }  
   
       if ((all_options & PCRE_NOPARTIAL) != 0)  
         fprintf(outfile, "Partial matching not supported\n");  
1449    
1450        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
1451          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
# Line 1327  while (!done) Line 1463  while (!done)
1463            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1464            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1465    
1466        switch (get_options & PCRE_NEWLINE_CRLF)        if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1467    
1468          switch (get_options & PCRE_NEWLINE_BITS)
1469          {          {
1470          case PCRE_NEWLINE_CR:          case PCRE_NEWLINE_CR:
1471          fprintf(outfile, "Forced newline sequence: CR\n");          fprintf(outfile, "Forced newline sequence: CR\n");
# Line 1341  while (!done) Line 1479  while (!done)
1479          fprintf(outfile, "Forced newline sequence: CRLF\n");          fprintf(outfile, "Forced newline sequence: CRLF\n");
1480          break;          break;
1481    
1482            case PCRE_NEWLINE_ANYCRLF:
1483            fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1484            break;
1485    
1486            case PCRE_NEWLINE_ANY:
1487            fprintf(outfile, "Forced newline sequence: ANY\n");
1488            break;
1489    
1490          default:          default:
1491          break;          break;
1492          }          }
# Line 1358  while (!done) Line 1504  while (!done)
1504          int ch = first_char & 255;          int ch = first_char & 255;
1505          const char *caseless = ((first_char & REQ_CASELESS) == 0)?          const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1506            "" : " (caseless)";            "" : " (caseless)";
1507          if (isprint(ch))          if (PRINTHEX(ch))
1508            fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);            fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1509          else          else
1510            fprintf(outfile, "First char = %d%s\n", ch, caseless);            fprintf(outfile, "First char = %d%s\n", ch, caseless);
# Line 1373  while (!done) Line 1519  while (!done)
1519          int ch = need_char & 255;          int ch = need_char & 255;
1520          const char *caseless = ((need_char & REQ_CASELESS) == 0)?          const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1521            "" : " (caseless)";            "" : " (caseless)";
1522          if (isprint(ch))          if (PRINTHEX(ch))
1523            fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);            fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1524          else          else
1525            fprintf(outfile, "Need char = %d%s\n", ch, caseless);            fprintf(outfile, "Need char = %d%s\n", ch, caseless);
# Line 1409  while (!done) Line 1555  while (!done)
1555                    fprintf(outfile, "\n  ");                    fprintf(outfile, "\n  ");
1556                    c = 2;                    c = 2;
1557                    }                    }
1558                  if (isprint(i) && i != ' ')                  if (PRINTHEX(i) && i != ' ')
1559                    {                    {
1560                    fprintf(outfile, "%c ", i);                    fprintf(outfile, "%c ", i);
1561                    c += 2;                    c += 2;
# Line 1468  while (!done) Line 1614  while (!done)
1614                  strerror(errno));                  strerror(errno));
1615                }                }
1616              else fprintf(outfile, "Study data written to %s\n", to_file);              else fprintf(outfile, "Study data written to %s\n", to_file);
1617    
1618              }              }
1619            }            }
1620          fclose(f);          fclose(f);
# Line 1485  while (!done) Line 1632  while (!done)
1632    for (;;)    for (;;)
1633      {      {
1634      uschar *q;      uschar *q;
1635      uschar *bptr = dbuffer;      uschar *bptr;
1636      int *use_offsets = offsets;      int *use_offsets = offsets;
1637      int use_size_offsets = size_offsets;      int use_size_offsets = size_offsets;
1638      int callout_data = 0;      int callout_data = 0;
# Line 1541  while (!done) Line 1688  while (!done)
1688      p = buffer;      p = buffer;
1689      while (isspace(*p)) p++;      while (isspace(*p)) p++;
1690    
1691      q = dbuffer;      bptr = q = dbuffer;
1692      while ((c = *p++) != 0)      while ((c = *p++) != 0)
1693        {        {
1694        int i = 0;        int i = 0;
# Line 1736  while (!done) Line 1883  while (!done)
1883            if (offsets == NULL)            if (offsets == NULL)
1884              {              {
1885              printf("** Failed to get %d bytes of memory for offsets vector\n",              printf("** Failed to get %d bytes of memory for offsets vector\n",
1886                size_offsets_max * sizeof(int));                (int)(size_offsets_max * sizeof(int)));
1887              yield = 1;              yield = 1;
1888              goto EXIT;              goto EXIT;
1889              }              }
# Line 1866  while (!done) Line 2013  while (!done)
2013    
2014      for (;; gmatched++)    /* Loop for /g or /G */      for (;; gmatched++)    /* Loop for /g or /G */
2015        {        {
2016        if (timeit)        if (timeitm > 0)
2017          {          {
2018          register int i;          register int i;
2019          clock_t time_taken;          clock_t time_taken;
# Line 1876  while (!done) Line 2023  while (!done)
2023          if (all_use_dfa || use_dfa)          if (all_use_dfa || use_dfa)
2024            {            {
2025            int workspace[1000];            int workspace[1000];
2026            for (i = 0; i < LOOPREPEAT; i++)            for (i = 0; i < timeitm; i++)
2027              count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,              count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2028                options | g_notempty, use_offsets, use_size_offsets, workspace,                options | g_notempty, use_offsets, use_size_offsets, workspace,
2029                sizeof(workspace)/sizeof(int));                sizeof(workspace)/sizeof(int));
# Line 1884  while (!done) Line 2031  while (!done)
2031          else          else
2032  #endif  #endif
2033    
2034          for (i = 0; i < LOOPREPEAT; i++)          for (i = 0; i < timeitm; i++)
2035            count = pcre_exec(re, extra, (char *)bptr, len,            count = pcre_exec(re, extra, (char *)bptr, len,
2036              start_offset, options | g_notempty, use_offsets, use_size_offsets);              start_offset, options | g_notempty, use_offsets, use_size_offsets);
2037    
2038          time_taken = clock() - start_time;          time_taken = clock() - start_time;
2039          fprintf(outfile, "Execute time %.3f milliseconds\n",          fprintf(outfile, "Execute time %.4f milliseconds\n",
2040            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /            (((double)time_taken * 1000.0) / (double)timeitm) /
2041              (double)CLOCKS_PER_SEC);              (double)CLOCKS_PER_SEC);
2042          }          }
2043    
# Line 1966  while (!done) Line 2113  while (!done)
2113    
2114        if (count >= 0)        if (count >= 0)
2115          {          {
2116          int i;          int i, maxcount;
2117    
2118    #if !defined NODFA
2119            if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2120    #endif
2121              maxcount = use_size_offsets/3;
2122    
2123            /* This is a check against a lunatic return value. */
2124    
2125            if (count > maxcount)
2126              {
2127              fprintf(outfile,
2128                "** PCRE error: returned count %d is too big for offset size %d\n",
2129                count, use_size_offsets);
2130              count = use_size_offsets/3;
2131              if (do_g || do_G)
2132                {
2133                fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2134                do_g = do_G = FALSE;        /* Break g/G loop */
2135                }
2136              }
2137    
2138          for (i = 0; i < count * 2; i += 2)          for (i = 0; i < count * 2; i += 2)
2139            {            {
2140            if (use_offsets[i] < 0)            if (use_offsets[i] < 0)
# Line 2084  while (!done) Line 2252  while (!done)
2252          }          }
2253    
2254        /* Failed to match. If this is a /g or /G loop and we previously set        /* Failed to match. If this is a /g or /G loop and we previously set
2255        g_notempty after a null match, this is not necessarily the end.        g_notempty after a null match, this is not necessarily the end. We want
2256        We want to advance the start offset, and continue. In the case of UTF-8        to advance the start offset, and continue. We won't be at the end of the
2257        matching, the advance must be one character, not one byte. Fudge the        string - that was checked before setting g_notempty.
2258        offset values to achieve this. We won't be at the end of the string -  
2259        that was checked before setting g_notempty. */        Complication arises in the case when the newline option is "any" or
2260          "anycrlf". If the previous match was at the end of a line terminated by
2261          CRLF, an advance of one character just passes the \r, whereas we should
2262          prefer the longer newline sequence, as does the code in pcre_exec().
2263          Fudge the offset value to achieve this.
2264    
2265          Otherwise, in the case of UTF-8 matching, the advance must be one
2266          character, not one byte. */
2267    
2268        else        else
2269          {          {
2270          if (g_notempty != 0)          if (g_notempty != 0)
2271            {            {
2272            int onechar = 1;            int onechar = 1;
2273              unsigned int obits = ((real_pcre *)re)->options;
2274            use_offsets[0] = start_offset;            use_offsets[0] = start_offset;
2275            if (use_utf8)            if ((obits & PCRE_NEWLINE_BITS) == 0)
2276                {
2277                int d;
2278                (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2279                obits = (d == '\r')? PCRE_NEWLINE_CR :
2280                        (d == '\n')? PCRE_NEWLINE_LF :
2281                        (d == ('\r'<<8 | '\n'))? PCRE_NEWLINE_CRLF :
2282                        (d == -2)? PCRE_NEWLINE_ANYCRLF :
2283                        (d == -1)? PCRE_NEWLINE_ANY : 0;
2284                }
2285              if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2286                   (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2287                  &&
2288                  start_offset < len - 1 &&
2289                  bptr[start_offset] == '\r' &&
2290                  bptr[start_offset+1] == '\n')
2291                onechar++;
2292              else if (use_utf8)
2293              {              {
2294              while (start_offset + onechar < len)              while (start_offset + onechar < len)
2295                {                {
# Line 2131  while (!done) Line 2324  while (!done)
2324        character. */        character. */
2325    
2326        g_notempty = 0;        g_notempty = 0;
2327    
2328        if (use_offsets[0] == use_offsets[1])        if (use_offsets[0] == use_offsets[1])
2329          {          {
2330          if (use_offsets[0] == len) break;          if (use_offsets[0] == len) break;
# Line 2165  while (!done) Line 2359  while (!done)
2359      {      {
2360      new_free((void *)tables);      new_free((void *)tables);
2361      setlocale(LC_CTYPE, "C");      setlocale(LC_CTYPE, "C");
2362        locale_set = 0;
2363      }      }
2364    }    }
2365    

Legend:
Removed from v.91  
changed lines
  Added in v.227

  ViewVC Help
Powered by ViewVC 1.1.5