/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 91 by nigel, Sat Feb 24 21:41:34 2007 UTC revision 199 by ph10, Tue Jul 31 14:39:09 2007 UTC
# Line 36  POSSIBILITY OF SUCH DAMAGE. Line 36  POSSIBILITY OF SUCH DAMAGE.
36  */  */
37    
38    
39    #ifdef HAVE_CONFIG_H
40    #include <config.h>
41    #endif
42    
43  #include <ctype.h>  #include <ctype.h>
44  #include <stdio.h>  #include <stdio.h>
45  #include <string.h>  #include <string.h>
# Line 44  POSSIBILITY OF SUCH DAMAGE. Line 48  POSSIBILITY OF SUCH DAMAGE.
48  #include <locale.h>  #include <locale.h>
49  #include <errno.h>  #include <errno.h>
50    
51  #ifndef _WIN32  
52  #include <sys/resource.h>  /* A number of things vary for Windows builds. Originally, pcretest opened its
53    input and output without "b"; then I was told that "b" was needed in some
54    environments, so it was added for release 5.0 to both the input and output. (It
55    makes no difference on Unix-like systems.) Later I was told that it is wrong
56    for the input on Windows. I've now abstracted the modes into two macros that
57    are set here, to make it easier to fiddle with them, and removed "b" from the
58    input mode under Windows. */
59    
60    #if defined(_WIN32) || defined(WIN32)
61    #include <io.h>                /* For _setmode() */
62    #include <fcntl.h>             /* For _O_BINARY */
63    #define INPUT_MODE   "r"
64    #define OUTPUT_MODE  "wb"
65    
66    #else
67    #include <sys/time.h>          /* These two includes are needed */
68    #include <sys/resource.h>      /* for setrlimit(). */
69    #define INPUT_MODE   "rb"
70    #define OUTPUT_MODE  "wb"
71  #endif  #endif
72    
 #define PCRE_SPY        /* For Win32 build, import data, not export */  
73    
74  /* We include pcre_internal.h because we need the internal info for displaying  /* We have to include pcre_internal.h because we need the internal info for
75  the results of pcre_study() and we also need to know about the internal  displaying the results of pcre_study() and we also need to know about the
76  macros, structures, and other internal data values; pcretest has "inside  internal macros, structures, and other internal data values; pcretest has
77  information" compared to a program that strictly follows the PCRE API. */  "inside information" compared to a program that strictly follows the PCRE API.
78    
79    Although pcre_internal.h does itself include pcre.h, we explicitly include it
80    here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
81    appropriately for an application, not for building PCRE. */
82    
83    #include "pcre.h"
84  #include "pcre_internal.h"  #include "pcre_internal.h"
85    
86  /* We need access to the data tables that PCRE uses. So as not to have to keep  /* We need access to the data tables that PCRE uses. So as not to have to keep
# Line 74  symbols to prevent clashes. */ Line 100  symbols to prevent clashes. */
100    
101  /* We also need the pcre_printint() function for printing out compiled  /* We also need the pcre_printint() function for printing out compiled
102  patterns. This function is in a separate file so that it can be included in  patterns. This function is in a separate file so that it can be included in
103  pcre_compile.c when that module is compiled with debugging enabled. */  pcre_compile.c when that module is compiled with debugging enabled.
104    
105    The definition of the macro PRINTABLE, which determines whether to print an
106    output character as-is or as a hex value when showing compiled patterns, is
107    contained in this file. We uses it here also, in cases when the locale has not
108    been explicitly changed, so as to get consistent output from systems that
109    differ in their output from isprint() even in the "C" locale. */
110    
111  #include "pcre_printint.src"  #include "pcre_printint.src"
112    
113    #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
114    
115    
116  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
117  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 87  Makefile. */ Line 121  Makefile. */
121  #include "pcreposix.h"  #include "pcreposix.h"
122  #endif  #endif
123    
124  /* It is also possible, for the benefit of the version imported into Exim, to  /* It is also possible, for the benefit of the version currently imported into
125  build pcretest without support for UTF8 (define NOUTF8), without the interface  Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
126  to the DFA matcher (NODFA), and without the doublecheck of the old "info"  interface to the DFA matcher (NODFA), and without the doublecheck of the old
127  function (define NOINFOCHECK). */  "info" function (define NOINFOCHECK). In fact, we automatically cut out the
128    UTF8 support if PCRE is built without it. */
129    
130    #ifndef SUPPORT_UTF8
131    #ifndef NOUTF8
132    #define NOUTF8
133    #endif
134    #endif
135    
136    
137  /* Other parameters */  /* Other parameters */
# Line 103  function (define NOINFOCHECK). */ Line 144  function (define NOINFOCHECK). */
144  #endif  #endif
145  #endif  #endif
146    
147    /* This is the default loop count for timing. */
148    
149  #define LOOPREPEAT 500000  #define LOOPREPEAT 500000
150    
151  /* Static variables */  /* Static variables */
# Line 114  static int callout_extra; Line 157  static int callout_extra;
157  static int callout_fail_count;  static int callout_fail_count;
158  static int callout_fail_id;  static int callout_fail_id;
159  static int first_callout;  static int first_callout;
160    static int locale_set = 0;
161  static int show_malloc;  static int show_malloc;
162  static int use_utf8;  static int use_utf8;
163  static size_t gotten_store;  static size_t gotten_store;
# Line 157  uschar *here = start; Line 201  uschar *here = start;
201  for (;;)  for (;;)
202    {    {
203    int rlen = buffer_size - (here - buffer);    int rlen = buffer_size - (here - buffer);
204    
205    if (rlen > 1000)    if (rlen > 1000)
206      {      {
207      int dlen;      int dlen;
# Line 213  return NULL;  /* Control never gets here Line 258  return NULL;  /* Control never gets here
258    
259  /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess  /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
260  around with conditional compilation, just do the job by hand. It is only used  around with conditional compilation, just do the job by hand. It is only used
261  for unpicking the -o argument, so just keep it simple.  for unpicking arguments, so just keep it simple.
262    
263  Arguments:  Arguments:
264    str           string to be converted    str           string to be converted
# Line 311  Arguments: Line 356  Arguments:
356  Returns:     number of characters placed in the buffer  Returns:     number of characters placed in the buffer
357  */  */
358    
359    #if !defined NOUTF8
360    
361  static int  static int
362  ord2utf8(int cvalue, uschar *utf8bytes)  ord2utf8(int cvalue, uschar *utf8bytes)
363  {  {
# Line 327  for (j = i; j > 0; j--) Line 374  for (j = i; j > 0; j--)
374  return i + 1;  return i + 1;
375  }  }
376    
377    #endif
378    
379    
380    
381  /*************************************************  /*************************************************
# Line 353  while (length-- > 0) Line 402  while (length-- > 0)
402        {        {
403        length -= rc - 1;        length -= rc - 1;
404        p += rc;        p += rc;
405        if (c < 256 && isprint(c))        if (PRINTHEX(c))
406          {          {
407          if (f != NULL) fprintf(f, "%c", c);          if (f != NULL) fprintf(f, "%c", c);
408          yield++;          yield++;
409          }          }
410        else        else
411          {          {
412          int n;          int n = 4;
413          if (f != NULL) fprintf(f, "\\x{%02x}%n", c, &n);          if (f != NULL) fprintf(f, "\\x{%02x}", c);
414          yield += n;          yield += (n <= 0x000000ff)? 2 :
415                     (n <= 0x00000fff)? 3 :
416                     (n <= 0x0000ffff)? 4 :
417                     (n <= 0x000fffff)? 5 : 6;
418          }          }
419        continue;        continue;
420        }        }
# Line 371  while (length-- > 0) Line 423  while (length-- > 0)
423    
424     /* Not UTF-8, or malformed UTF-8  */     /* Not UTF-8, or malformed UTF-8  */
425    
426    if (isprint(c = *(p++)))    c = *p++;
427      if (PRINTHEX(c))
428      {      {
429      if (f != NULL) fprintf(f, "%c", c);      if (f != NULL) fprintf(f, "%c", c);
430      yield++;      yield++;
# Line 614  return count; Line 667  return count;
667  *************************************************/  *************************************************/
668    
669  /* This is used both at compile and run-time to check for <xxx> escapes, where  /* This is used both at compile and run-time to check for <xxx> escapes, where
670  xxx is LF, CR, or CRLF. Print a message and return 0 if there is no match.  xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is
671    no match.
672    
673  Arguments:  Arguments:
674    p           points after the leading '<'    p           points after the leading '<'
# Line 629  check_newline(uschar *p, FILE *f) Line 683  check_newline(uschar *p, FILE *f)
683  if (strncmp((char *)p, "cr>", 3) == 0) return PCRE_NEWLINE_CR;  if (strncmp((char *)p, "cr>", 3) == 0) return PCRE_NEWLINE_CR;
684  if (strncmp((char *)p, "lf>", 3) == 0) return PCRE_NEWLINE_LF;  if (strncmp((char *)p, "lf>", 3) == 0) return PCRE_NEWLINE_LF;
685  if (strncmp((char *)p, "crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;  if (strncmp((char *)p, "crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
686    if (strncmp((char *)p, "anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
687    if (strncmp((char *)p, "any>", 4) == 0) return PCRE_NEWLINE_ANY;
688  fprintf(f, "Unknown newline type at: <%s\n", p);  fprintf(f, "Unknown newline type at: <%s\n", p);
689  return 0;  return 0;
690  }  }
# Line 636  return 0; Line 692  return 0;
692    
693    
694  /*************************************************  /*************************************************
695    *             Usage function                     *
696    *************************************************/
697    
698    static void
699    usage(void)
700    {
701    printf("Usage:     pcretest [options] [<input> [<output>]]\n");
702    printf("  -b       show compiled code (bytecode)\n");
703    printf("  -C       show PCRE compile-time options and exit\n");
704    printf("  -d       debug: show compiled code and information (-b and -i)\n");
705    #if !defined NODFA
706    printf("  -dfa     force DFA matching for all subjects\n");
707    #endif
708    printf("  -help    show usage information\n");
709    printf("  -i       show information about compiled patterns\n"
710           "  -m       output memory used information\n"
711           "  -o <n>   set size of offsets vector to <n>\n");
712    #if !defined NOPOSIX
713    printf("  -p       use POSIX interface\n");
714    #endif
715    printf("  -q       quiet: do not output PCRE version number at start\n");
716    printf("  -S <n>   set stack size to <n> megabytes\n");
717    printf("  -s       output store (memory) used information\n"
718           "  -t       time compilation and execution\n");
719    printf("  -t <n>   time compilation and execution, repeating <n> times\n");
720    printf("  -tm      time execution (matching) only\n");
721    printf("  -tm <n>  time execution (matching) only, repeating <n> times\n");
722    }
723    
724    
725    
726    /*************************************************
727  *                Main Program                    *  *                Main Program                    *
728  *************************************************/  *************************************************/
729    
# Line 650  int options = 0; Line 738  int options = 0;
738  int study_options = 0;  int study_options = 0;
739  int op = 1;  int op = 1;
740  int timeit = 0;  int timeit = 0;
741    int timeitm = 0;
742  int showinfo = 0;  int showinfo = 0;
743  int showstore = 0;  int showstore = 0;
744  int quiet = 0;  int quiet = 0;
# Line 681  buffer = (unsigned char *)malloc(buffer_ Line 770  buffer = (unsigned char *)malloc(buffer_
770  dbuffer = (unsigned char *)malloc(buffer_size);  dbuffer = (unsigned char *)malloc(buffer_size);
771  pbuffer = (unsigned char *)malloc(buffer_size);  pbuffer = (unsigned char *)malloc(buffer_size);
772    
773  /* The outfile variable is static so that new_malloc can use it. The _setmode()  /* The outfile variable is static so that new_malloc can use it. */
 stuff is some magic that I don't understand, but which apparently does good  
 things in Windows. It's related to line terminations.  */  
   
 #if defined(_WIN32) || defined(WIN32)  
 _setmode( _fileno( stdout ), 0x8000 );  
 #endif  /* defined(_WIN32) || defined(WIN32) */  
774    
775  outfile = stdout;  outfile = stdout;
776    
777    /* The following  _setmode() stuff is some Windows magic that tells its runtime
778    library to translate CRLF into a single LF character. At least, that's what
779    I've been told: never having used Windows I take this all on trust. Originally
780    it set 0x8000, but then I was advised that _O_BINARY was better. */
781    
782    #if defined(_WIN32) || defined(WIN32)
783    _setmode( _fileno( stdout ), _O_BINARY );
784    #endif
785    
786  /* Scan options */  /* Scan options */
787    
788  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
# Line 699  while (argc > 1 && argv[op][0] == '-') Line 791  while (argc > 1 && argv[op][0] == '-')
791    
792    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
793      showstore = 1;      showstore = 1;
   else if (strcmp(argv[op], "-t") == 0) timeit = 1;  
794    else if (strcmp(argv[op], "-q") == 0) quiet = 1;    else if (strcmp(argv[op], "-q") == 0) quiet = 1;
795      else if (strcmp(argv[op], "-b") == 0) debug = 1;
796    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
797    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
798  #if !defined NODFA  #if !defined NODFA
# Line 713  while (argc > 1 && argv[op][0] == '-') Line 805  while (argc > 1 && argv[op][0] == '-')
805      op++;      op++;
806      argc--;      argc--;
807      }      }
808      else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
809        {
810        int both = argv[op][2] == 0;
811        int temp;
812        if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
813                         *endptr == 0))
814          {
815          timeitm = temp;
816          op++;
817          argc--;
818          }
819        else timeitm = LOOPREPEAT;
820        if (both) timeit = timeitm;
821        }
822    else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&    else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
823        ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),        ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
824          *endptr == 0))          *endptr == 0))
825      {      {
826  #ifdef _WIN32  #if defined(_WIN32) || defined(WIN32)
827      printf("PCRE: -S not supported on this OS\n");      printf("PCRE: -S not supported on this OS\n");
828      exit(1);      exit(1);
829  #else  #else
# Line 749  while (argc > 1 && argv[op][0] == '-') Line 855  while (argc > 1 && argv[op][0] == '-')
855      printf("  %sUnicode properties support\n", rc? "" : "No ");      printf("  %sUnicode properties support\n", rc? "" : "No ");
856      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
857      printf("  Newline sequence is %s\n", (rc == '\r')? "CR" :      printf("  Newline sequence is %s\n", (rc == '\r')? "CR" :
858        (rc == '\n')? "LF" : "CRLF");        (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :
859          (rc == -2)? "ANYCRLF" :
860          (rc == -1)? "ANY" : "???");
861      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
862      printf("  Internal link size = %d\n", rc);      printf("  Internal link size = %d\n", rc);
863      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
# Line 760  while (argc > 1 && argv[op][0] == '-') Line 868  while (argc > 1 && argv[op][0] == '-')
868      printf("  Default recursion depth limit = %d\n", rc);      printf("  Default recursion depth limit = %d\n", rc);
869      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
870      printf("  Match recursion uses %s\n", rc? "stack" : "heap");      printf("  Match recursion uses %s\n", rc? "stack" : "heap");
871      exit(0);      goto EXIT;
872        }
873      else if (strcmp(argv[op], "-help") == 0 ||
874               strcmp(argv[op], "--help") == 0)
875        {
876        usage();
877        goto EXIT;
878      }      }
879    else    else
880      {      {
881      printf("** Unknown or malformed option %s\n", argv[op]);      printf("** Unknown or malformed option %s\n", argv[op]);
882      printf("Usage:   pcretest [options] [<input> [<output>]]\n");      usage();
     printf("  -C     show PCRE compile-time options and exit\n");  
     printf("  -d     debug: show compiled code; implies -i\n");  
 #if !defined NODFA  
     printf("  -dfa   force DFA matching for all subjects\n");  
 #endif  
     printf("  -i     show information about compiled pattern\n"  
            "  -m     output memory used information\n"  
            "  -o <n> set size of offsets vector to <n>\n");  
 #if !defined NOPOSIX  
     printf("  -p     use POSIX interface\n");  
 #endif  
     printf("  -S <n> set stack size to <n> megabytes\n");  
     printf("  -s     output store (memory) used information\n"  
            "  -t     time compilation and execution\n");  
883      yield = 1;      yield = 1;
884      goto EXIT;      goto EXIT;
885      }      }
# Line 794  offsets = (int *)malloc(size_offsets_max Line 894  offsets = (int *)malloc(size_offsets_max
894  if (offsets == NULL)  if (offsets == NULL)
895    {    {
896    printf("** Failed to get %d bytes of memory for offsets vector\n",    printf("** Failed to get %d bytes of memory for offsets vector\n",
897      size_offsets_max * sizeof(int));      (int)(size_offsets_max * sizeof(int)));
898    yield = 1;    yield = 1;
899    goto EXIT;    goto EXIT;
900    }    }
# Line 803  if (offsets == NULL) Line 903  if (offsets == NULL)
903    
904  if (argc > 1)  if (argc > 1)
905    {    {
906    infile = fopen(argv[op], "rb");    infile = fopen(argv[op], INPUT_MODE);
907    if (infile == NULL)    if (infile == NULL)
908      {      {
909      printf("** Failed to open %s\n", argv[op]);      printf("** Failed to open %s\n", argv[op]);
# Line 814  if (argc > 1) Line 914  if (argc > 1)
914    
915  if (argc > 2)  if (argc > 2)
916    {    {
917    outfile = fopen(argv[op+1], "wb");    outfile = fopen(argv[op+1], OUTPUT_MODE);
918    if (outfile == NULL)    if (outfile == NULL)
919      {      {
920      printf("** Failed to open %s\n", argv[op+1]);      printf("** Failed to open %s\n", argv[op+1]);
# Line 854  while (!done) Line 954  while (!done)
954    size_t size, regex_gotten_store;    size_t size, regex_gotten_store;
955    int do_study = 0;    int do_study = 0;
956    int do_debug = debug;    int do_debug = debug;
957      int debug_lengths = 1;
958    int do_G = 0;    int do_G = 0;
959    int do_g = 0;    int do_g = 0;
960    int do_showinfo = showinfo;    int do_showinfo = showinfo;
961    int do_showrest = 0;    int do_showrest = 0;
962    int do_flip = 0;    int do_flip = 0;
963    int erroroffset, len, delimiter;    int erroroffset, len, delimiter, poffset;
964    
965    use_utf8 = 0;    use_utf8 = 0;
966    
# Line 969  while (!done) Line 1070  while (!done)
1070      }      }
1071    
1072    pp = p;    pp = p;
1073      poffset = p - buffer;
1074    
1075    for(;;)    for(;;)
1076      {      {
# Line 989  while (!done) Line 1091  while (!done)
1091      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1092      }      }
1093    
1094      /* The buffer may have moved while being extended; reset the start of data
1095      pointer to the correct relative point in the buffer. */
1096    
1097      p = buffer + poffset;
1098    
1099    /* If the first character after the delimiter is backslash, make    /* If the first character after the delimiter is backslash, make
1100    the pattern end with backslash. This is purely to provide a way    the pattern end with backslash. This is purely to provide a way
1101    of testing for the error message when a pattern ends with backslash. */    of testing for the error message when a pattern ends with backslash. */
# Line 1020  while (!done) Line 1127  while (!done)
1127    
1128        case '+': do_showrest = 1; break;        case '+': do_showrest = 1; break;
1129        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
1130          case 'B': do_debug = 1; break;
1131        case 'C': options |= PCRE_AUTO_CALLOUT; break;        case 'C': options |= PCRE_AUTO_CALLOUT; break;
1132        case 'D': do_debug = do_showinfo = 1; break;        case 'D': do_debug = do_showinfo = 1; break;
1133        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
# Line 1037  while (!done) Line 1145  while (!done)
1145        case 'S': do_study = 1; break;        case 'S': do_study = 1; break;
1146        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
1147        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
1148          case 'Z': debug_lengths = 0; break;
1149        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1150        case '?': options |= PCRE_NO_UTF8_CHECK; break;        case '?': options |= PCRE_NO_UTF8_CHECK; break;
1151    
1152        case 'L':        case 'L':
1153        ppp = pp;        ppp = pp;
1154        /* The '\r' test here is so that it works on Windows */        /* The '\r' test here is so that it works on Windows. */
1155        while (*ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;        /* The '0' test is just in case this is an unterminated line. */
1156          while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1157        *ppp = 0;        *ppp = 0;
1158        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1159          {          {
1160          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1161          goto SKIP_DATA;          goto SKIP_DATA;
1162          }          }
1163          locale_set = 1;
1164        tables = pcre_maketables();        tables = pcre_maketables();
1165        pp = ppp;        pp = ppp;
1166        break;        break;
# Line 1116  while (!done) Line 1227  while (!done)
1227  #endif  /* !defined NOPOSIX */  #endif  /* !defined NOPOSIX */
1228    
1229      {      {
1230      if (timeit)      if (timeit > 0)
1231        {        {
1232        register int i;        register int i;
1233        clock_t time_taken;        clock_t time_taken;
1234        clock_t start_time = clock();        clock_t start_time = clock();
1235        for (i = 0; i < LOOPREPEAT; i++)        for (i = 0; i < timeit; i++)
1236          {          {
1237          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1238          if (re != NULL) free(re);          if (re != NULL) free(re);
1239          }          }
1240        time_taken = clock() - start_time;        time_taken = clock() - start_time;
1241        fprintf(outfile, "Compile time %.3f milliseconds\n",        fprintf(outfile, "Compile time %.4f milliseconds\n",
1242          (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /          (((double)time_taken * 1000.0) / (double)timeit) /
1243            (double)CLOCKS_PER_SEC);            (double)CLOCKS_PER_SEC);
1244        }        }
1245    
# Line 1180  while (!done) Line 1291  while (!done)
1291    
1292      if (do_study)      if (do_study)
1293        {        {
1294        if (timeit)        if (timeit > 0)
1295          {          {
1296          register int i;          register int i;
1297          clock_t time_taken;          clock_t time_taken;
1298          clock_t start_time = clock();          clock_t start_time = clock();
1299          for (i = 0; i < LOOPREPEAT; i++)          for (i = 0; i < timeit; i++)
1300            extra = pcre_study(re, study_options, &error);            extra = pcre_study(re, study_options, &error);
1301          time_taken = clock() - start_time;          time_taken = clock() - start_time;
1302          if (extra != NULL) free(extra);          if (extra != NULL) free(extra);
1303          fprintf(outfile, "  Study time %.3f milliseconds\n",          fprintf(outfile, "  Study time %.4f milliseconds\n",
1304            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /            (((double)time_taken * 1000.0) / (double)timeit) /
1305              (double)CLOCKS_PER_SEC);              (double)CLOCKS_PER_SEC);
1306          }          }
1307        extra = pcre_study(re, study_options, &error);        extra = pcre_study(re, study_options, &error);
# Line 1233  while (!done) Line 1344  while (!done)
1344    
1345      SHOW_INFO:      SHOW_INFO:
1346    
1347        if (do_debug)
1348          {
1349          fprintf(outfile, "------------------------------------------------------------------\n");
1350          pcre_printint(re, outfile, debug_lengths);
1351          }
1352    
1353      if (do_showinfo)      if (do_showinfo)
1354        {        {
1355        unsigned long int get_options, all_options;        unsigned long int get_options, all_options;
1356  #if !defined NOINFOCHECK  #if !defined NOINFOCHECK
1357        int old_first_char, old_options, old_count;        int old_first_char, old_options, old_count;
1358  #endif  #endif
1359        int count, backrefmax, first_char, need_char;        int count, backrefmax, first_char, need_char, okpartial, jchanged;
1360        int nameentrysize, namecount;        int nameentrysize, namecount;
1361        const uschar *nametable;        const uschar *nametable;
1362    
       if (do_debug)  
         {  
         fprintf(outfile, "------------------------------------------------------------------\n");  
         pcre_printint(re, outfile);  
         }  
   
1363        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1364        new_info(re, NULL, PCRE_INFO_SIZE, &size);        new_info(re, NULL, PCRE_INFO_SIZE, &size);
1365        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
# Line 1258  while (!done) Line 1369  while (!done)
1369        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1370        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1371        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1372          new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1373          new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1374    
1375  #if !defined NOINFOCHECK  #if !defined NOINFOCHECK
1376        old_count = pcre_info(re, &old_options, &old_first_char);        old_count = pcre_info(re, &old_options, &old_first_char);
# Line 1299  while (!done) Line 1412  while (!done)
1412            }            }
1413          }          }
1414    
1415        /* The NOPARTIAL bit is a private bit in the options, so we have        if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
       to fish it out via out back door */  
1416    
1417        all_options = ((real_pcre *)re)->options;        all_options = ((real_pcre *)re)->options;
1418        if (do_flip)        if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
         {  
         all_options = byteflip(all_options, sizeof(all_options));  
          }  
   
       if ((all_options & PCRE_NOPARTIAL) != 0)  
         fprintf(outfile, "Partial matching not supported\n");  
1419    
1420        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
1421          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
# Line 1327  while (!done) Line 1433  while (!done)
1433            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1434            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1435    
1436        switch (get_options & PCRE_NEWLINE_CRLF)        if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1437    
1438          switch (get_options & PCRE_NEWLINE_BITS)
1439          {          {
1440          case PCRE_NEWLINE_CR:          case PCRE_NEWLINE_CR:
1441          fprintf(outfile, "Forced newline sequence: CR\n");          fprintf(outfile, "Forced newline sequence: CR\n");
# Line 1341  while (!done) Line 1449  while (!done)
1449          fprintf(outfile, "Forced newline sequence: CRLF\n");          fprintf(outfile, "Forced newline sequence: CRLF\n");
1450          break;          break;
1451    
1452            case PCRE_NEWLINE_ANYCRLF:
1453            fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1454            break;
1455    
1456            case PCRE_NEWLINE_ANY:
1457            fprintf(outfile, "Forced newline sequence: ANY\n");
1458            break;
1459    
1460          default:          default:
1461          break;          break;
1462          }          }
# Line 1358  while (!done) Line 1474  while (!done)
1474          int ch = first_char & 255;          int ch = first_char & 255;
1475          const char *caseless = ((first_char & REQ_CASELESS) == 0)?          const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1476            "" : " (caseless)";            "" : " (caseless)";
1477          if (isprint(ch))          if (PRINTHEX(ch))
1478            fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);            fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1479          else          else
1480            fprintf(outfile, "First char = %d%s\n", ch, caseless);            fprintf(outfile, "First char = %d%s\n", ch, caseless);
# Line 1373  while (!done) Line 1489  while (!done)
1489          int ch = need_char & 255;          int ch = need_char & 255;
1490          const char *caseless = ((need_char & REQ_CASELESS) == 0)?          const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1491            "" : " (caseless)";            "" : " (caseless)";
1492          if (isprint(ch))          if (PRINTHEX(ch))
1493            fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);            fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1494          else          else
1495            fprintf(outfile, "Need char = %d%s\n", ch, caseless);            fprintf(outfile, "Need char = %d%s\n", ch, caseless);
# Line 1409  while (!done) Line 1525  while (!done)
1525                    fprintf(outfile, "\n  ");                    fprintf(outfile, "\n  ");
1526                    c = 2;                    c = 2;
1527                    }                    }
1528                  if (isprint(i) && i != ' ')                  if (PRINTHEX(i) && i != ' ')
1529                    {                    {
1530                    fprintf(outfile, "%c ", i);                    fprintf(outfile, "%c ", i);
1531                    c += 2;                    c += 2;
# Line 1468  while (!done) Line 1584  while (!done)
1584                  strerror(errno));                  strerror(errno));
1585                }                }
1586              else fprintf(outfile, "Study data written to %s\n", to_file);              else fprintf(outfile, "Study data written to %s\n", to_file);
1587    
1588              }              }
1589            }            }
1590          fclose(f);          fclose(f);
# Line 1485  while (!done) Line 1602  while (!done)
1602    for (;;)    for (;;)
1603      {      {
1604      uschar *q;      uschar *q;
1605      uschar *bptr = dbuffer;      uschar *bptr;
1606      int *use_offsets = offsets;      int *use_offsets = offsets;
1607      int use_size_offsets = size_offsets;      int use_size_offsets = size_offsets;
1608      int callout_data = 0;      int callout_data = 0;
# Line 1541  while (!done) Line 1658  while (!done)
1658      p = buffer;      p = buffer;
1659      while (isspace(*p)) p++;      while (isspace(*p)) p++;
1660    
1661      q = dbuffer;      bptr = q = dbuffer;
1662      while ((c = *p++) != 0)      while ((c = *p++) != 0)
1663        {        {
1664        int i = 0;        int i = 0;
# Line 1736  while (!done) Line 1853  while (!done)
1853            if (offsets == NULL)            if (offsets == NULL)
1854              {              {
1855              printf("** Failed to get %d bytes of memory for offsets vector\n",              printf("** Failed to get %d bytes of memory for offsets vector\n",
1856                size_offsets_max * sizeof(int));                (int)(size_offsets_max * sizeof(int)));
1857              yield = 1;              yield = 1;
1858              goto EXIT;              goto EXIT;
1859              }              }
# Line 1866  while (!done) Line 1983  while (!done)
1983    
1984      for (;; gmatched++)    /* Loop for /g or /G */      for (;; gmatched++)    /* Loop for /g or /G */
1985        {        {
1986        if (timeit)        if (timeitm > 0)
1987          {          {
1988          register int i;          register int i;
1989          clock_t time_taken;          clock_t time_taken;
# Line 1876  while (!done) Line 1993  while (!done)
1993          if (all_use_dfa || use_dfa)          if (all_use_dfa || use_dfa)
1994            {            {
1995            int workspace[1000];            int workspace[1000];
1996            for (i = 0; i < LOOPREPEAT; i++)            for (i = 0; i < timeitm; i++)
1997              count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,              count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
1998                options | g_notempty, use_offsets, use_size_offsets, workspace,                options | g_notempty, use_offsets, use_size_offsets, workspace,
1999                sizeof(workspace)/sizeof(int));                sizeof(workspace)/sizeof(int));
# Line 1884  while (!done) Line 2001  while (!done)
2001          else          else
2002  #endif  #endif
2003    
2004          for (i = 0; i < LOOPREPEAT; i++)          for (i = 0; i < timeitm; i++)
2005            count = pcre_exec(re, extra, (char *)bptr, len,            count = pcre_exec(re, extra, (char *)bptr, len,
2006              start_offset, options | g_notempty, use_offsets, use_size_offsets);              start_offset, options | g_notempty, use_offsets, use_size_offsets);
2007    
2008          time_taken = clock() - start_time;          time_taken = clock() - start_time;
2009          fprintf(outfile, "Execute time %.3f milliseconds\n",          fprintf(outfile, "Execute time %.4f milliseconds\n",
2010            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /            (((double)time_taken * 1000.0) / (double)timeitm) /
2011              (double)CLOCKS_PER_SEC);              (double)CLOCKS_PER_SEC);
2012          }          }
2013    
# Line 1966  while (!done) Line 2083  while (!done)
2083    
2084        if (count >= 0)        if (count >= 0)
2085          {          {
2086          int i;          int i, maxcount;
2087    
2088    #if !defined NODFA
2089            if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2090    #endif
2091              maxcount = use_size_offsets/3;
2092    
2093            /* This is a check against a lunatic return value. */
2094    
2095            if (count > maxcount)
2096              {
2097              fprintf(outfile,
2098                "** PCRE error: returned count %d is too big for offset size %d\n",
2099                count, use_size_offsets);
2100              count = use_size_offsets/3;
2101              if (do_g || do_G)
2102                {
2103                fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2104                do_g = do_G = FALSE;        /* Break g/G loop */
2105                }
2106              }
2107    
2108          for (i = 0; i < count * 2; i += 2)          for (i = 0; i < count * 2; i += 2)
2109            {            {
2110            if (use_offsets[i] < 0)            if (use_offsets[i] < 0)
# Line 2084  while (!done) Line 2222  while (!done)
2222          }          }
2223    
2224        /* Failed to match. If this is a /g or /G loop and we previously set        /* Failed to match. If this is a /g or /G loop and we previously set
2225        g_notempty after a null match, this is not necessarily the end.        g_notempty after a null match, this is not necessarily the end. We want
2226        We want to advance the start offset, and continue. In the case of UTF-8        to advance the start offset, and continue. We won't be at the end of the
2227        matching, the advance must be one character, not one byte. Fudge the        string - that was checked before setting g_notempty.
2228        offset values to achieve this. We won't be at the end of the string -  
2229        that was checked before setting g_notempty. */        Complication arises in the case when the newline option is "any" or
2230          "anycrlf". If the previous match was at the end of a line terminated by
2231          CRLF, an advance of one character just passes the \r, whereas we should
2232          prefer the longer newline sequence, as does the code in pcre_exec().
2233          Fudge the offset value to achieve this.
2234    
2235          Otherwise, in the case of UTF-8 matching, the advance must be one
2236          character, not one byte. */
2237    
2238        else        else
2239          {          {
2240          if (g_notempty != 0)          if (g_notempty != 0)
2241            {            {
2242            int onechar = 1;            int onechar = 1;
2243              unsigned int obits = ((real_pcre *)re)->options;
2244            use_offsets[0] = start_offset;            use_offsets[0] = start_offset;
2245            if (use_utf8)            if ((obits & PCRE_NEWLINE_BITS) == 0)
2246                {
2247                int d;
2248                (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2249                obits = (d == '\r')? PCRE_NEWLINE_CR :
2250                        (d == '\n')? PCRE_NEWLINE_LF :
2251                        (d == ('\r'<<8 | '\n'))? PCRE_NEWLINE_CRLF :
2252                        (d == -2)? PCRE_NEWLINE_ANYCRLF :
2253                        (d == -1)? PCRE_NEWLINE_ANY : 0;
2254                }
2255              if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2256                   (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2257                  &&
2258                  start_offset < len - 1 &&
2259                  bptr[start_offset] == '\r' &&
2260                  bptr[start_offset+1] == '\n')
2261                onechar++;
2262              else if (use_utf8)
2263              {              {
2264              while (start_offset + onechar < len)              while (start_offset + onechar < len)
2265                {                {
# Line 2131  while (!done) Line 2294  while (!done)
2294        character. */        character. */
2295    
2296        g_notempty = 0;        g_notempty = 0;
2297    
2298        if (use_offsets[0] == use_offsets[1])        if (use_offsets[0] == use_offsets[1])
2299          {          {
2300          if (use_offsets[0] == len) break;          if (use_offsets[0] == len) break;
# Line 2165  while (!done) Line 2329  while (!done)
2329      {      {
2330      new_free((void *)tables);      new_free((void *)tables);
2331      setlocale(LC_CTYPE, "C");      setlocale(LC_CTYPE, "C");
2332        locale_set = 0;
2333      }      }
2334    }    }
2335    

Legend:
Removed from v.91  
changed lines
  Added in v.199

  ViewVC Help
Powered by ViewVC 1.1.5