/[pcre]/code/branches/pcre16/pcretest.c
ViewVC logotype

Diff of /code/branches/pcre16/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 91 by nigel, Sat Feb 24 21:41:34 2007 UTC revision 146 by ph10, Thu Apr 5 09:17:28 2007 UTC
# Line 44  POSSIBILITY OF SUCH DAMAGE. Line 44  POSSIBILITY OF SUCH DAMAGE.
44  #include <locale.h>  #include <locale.h>
45  #include <errno.h>  #include <errno.h>
46    
47  #ifndef _WIN32  
48  #include <sys/resource.h>  /* A number of things vary for Windows builds. Originally, pcretest opened its
49    input and output without "b"; then I was told that "b" was needed in some
50    environments, so it was added for release 5.0 to both the input and output. (It
51    makes no difference on Unix-like systems.) Later I was told that it is wrong
52    for the input on Windows. I've now abstracted the modes into two macros that
53    are set here, to make it easier to fiddle with them, and removed "b" from the
54    input mode under Windows. */
55    
56    #if defined(_WIN32) || defined(WIN32)
57    #include <io.h>                /* For _setmode() */
58    #include <fcntl.h>             /* For _O_BINARY */
59    #define INPUT_MODE   "r"
60    #define OUTPUT_MODE  "wb"
61    
62    #else
63    #include <sys/time.h>          /* These two includes are needed */
64    #include <sys/resource.h>      /* for setrlimit(). */
65    #define INPUT_MODE   "rb"
66    #define OUTPUT_MODE  "wb"
67  #endif  #endif
68    
 #define PCRE_SPY        /* For Win32 build, import data, not export */  
69    
70  /* We include pcre_internal.h because we need the internal info for displaying  /* We have to include pcre_internal.h because we need the internal info for
71  the results of pcre_study() and we also need to know about the internal  displaying the results of pcre_study() and we also need to know about the
72  macros, structures, and other internal data values; pcretest has "inside  internal macros, structures, and other internal data values; pcretest has
73  information" compared to a program that strictly follows the PCRE API. */  "inside information" compared to a program that strictly follows the PCRE API.
74    
75    Although pcre_internal.h does itself include pcre.h, we explicitly include it
76    here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
77    appropriately for an application, not for building PCRE. */
78    
79    #include "pcre.h"
80  #include "pcre_internal.h"  #include "pcre_internal.h"
81    
82  /* We need access to the data tables that PCRE uses. So as not to have to keep  /* We need access to the data tables that PCRE uses. So as not to have to keep
# Line 74  symbols to prevent clashes. */ Line 96  symbols to prevent clashes. */
96    
97  /* We also need the pcre_printint() function for printing out compiled  /* We also need the pcre_printint() function for printing out compiled
98  patterns. This function is in a separate file so that it can be included in  patterns. This function is in a separate file so that it can be included in
99  pcre_compile.c when that module is compiled with debugging enabled. */  pcre_compile.c when that module is compiled with debugging enabled.
100    
101    The definition of the macro PRINTABLE, which determines whether to print an
102    output character as-is or as a hex value when showing compiled patterns, is
103    contained in this file. We uses it here also, in cases when the locale has not
104    been explicitly changed, so as to get consistent output from systems that
105    differ in their output from isprint() even in the "C" locale. */
106    
107  #include "pcre_printint.src"  #include "pcre_printint.src"
108    
109    #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
110    
111    
112  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
113  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 87  Makefile. */ Line 117  Makefile. */
117  #include "pcreposix.h"  #include "pcreposix.h"
118  #endif  #endif
119    
120  /* It is also possible, for the benefit of the version imported into Exim, to  /* It is also possible, for the benefit of the version currently imported into
121  build pcretest without support for UTF8 (define NOUTF8), without the interface  Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
122  to the DFA matcher (NODFA), and without the doublecheck of the old "info"  interface to the DFA matcher (NODFA), and without the doublecheck of the old
123  function (define NOINFOCHECK). */  "info" function (define NOINFOCHECK). In fact, we automatically cut out the
124    UTF8 support if PCRE is built without it. */
125    
126    #ifndef SUPPORT_UTF8
127    #ifndef NOUTF8
128    #define NOUTF8
129    #endif
130    #endif
131    
132    
133  /* Other parameters */  /* Other parameters */
# Line 103  function (define NOINFOCHECK). */ Line 140  function (define NOINFOCHECK). */
140  #endif  #endif
141  #endif  #endif
142    
143    /* This is the default loop count for timing. */
144    
145  #define LOOPREPEAT 500000  #define LOOPREPEAT 500000
146    
147  /* Static variables */  /* Static variables */
# Line 114  static int callout_extra; Line 153  static int callout_extra;
153  static int callout_fail_count;  static int callout_fail_count;
154  static int callout_fail_id;  static int callout_fail_id;
155  static int first_callout;  static int first_callout;
156    static int locale_set = 0;
157  static int show_malloc;  static int show_malloc;
158  static int use_utf8;  static int use_utf8;
159  static size_t gotten_store;  static size_t gotten_store;
# Line 157  uschar *here = start; Line 197  uschar *here = start;
197  for (;;)  for (;;)
198    {    {
199    int rlen = buffer_size - (here - buffer);    int rlen = buffer_size - (here - buffer);
200    
201    if (rlen > 1000)    if (rlen > 1000)
202      {      {
203      int dlen;      int dlen;
# Line 213  return NULL;  /* Control never gets here Line 254  return NULL;  /* Control never gets here
254    
255  /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess  /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
256  around with conditional compilation, just do the job by hand. It is only used  around with conditional compilation, just do the job by hand. It is only used
257  for unpicking the -o argument, so just keep it simple.  for unpicking arguments, so just keep it simple.
258    
259  Arguments:  Arguments:
260    str           string to be converted    str           string to be converted
# Line 311  Arguments: Line 352  Arguments:
352  Returns:     number of characters placed in the buffer  Returns:     number of characters placed in the buffer
353  */  */
354    
355    #if !defined NOUTF8
356    
357  static int  static int
358  ord2utf8(int cvalue, uschar *utf8bytes)  ord2utf8(int cvalue, uschar *utf8bytes)
359  {  {
# Line 327  for (j = i; j > 0; j--) Line 370  for (j = i; j > 0; j--)
370  return i + 1;  return i + 1;
371  }  }
372    
373    #endif
374    
375    
376    
377  /*************************************************  /*************************************************
# Line 353  while (length-- > 0) Line 398  while (length-- > 0)
398        {        {
399        length -= rc - 1;        length -= rc - 1;
400        p += rc;        p += rc;
401        if (c < 256 && isprint(c))        if (PRINTHEX(c))
402          {          {
403          if (f != NULL) fprintf(f, "%c", c);          if (f != NULL) fprintf(f, "%c", c);
404          yield++;          yield++;
405          }          }
406        else        else
407          {          {
408          int n;          int n = 4;
409          if (f != NULL) fprintf(f, "\\x{%02x}%n", c, &n);          if (f != NULL) fprintf(f, "\\x{%02x}", c);
410          yield += n;          yield += (n <= 0x000000ff)? 2 :
411                     (n <= 0x00000fff)? 3 :
412                     (n <= 0x0000ffff)? 4 :
413                     (n <= 0x000fffff)? 5 : 6;
414          }          }
415        continue;        continue;
416        }        }
# Line 371  while (length-- > 0) Line 419  while (length-- > 0)
419    
420     /* Not UTF-8, or malformed UTF-8  */     /* Not UTF-8, or malformed UTF-8  */
421    
422    if (isprint(c = *(p++)))    c = *p++;
423      if (PRINTHEX(c))
424      {      {
425      if (f != NULL) fprintf(f, "%c", c);      if (f != NULL) fprintf(f, "%c", c);
426      yield++;      yield++;
# Line 614  return count; Line 663  return count;
663  *************************************************/  *************************************************/
664    
665  /* This is used both at compile and run-time to check for <xxx> escapes, where  /* This is used both at compile and run-time to check for <xxx> escapes, where
666  xxx is LF, CR, or CRLF. Print a message and return 0 if there is no match.  xxx is LF, CR, CRLF, or ANY. Print a message and return 0 if there is no match.
667    
668  Arguments:  Arguments:
669    p           points after the leading '<'    p           points after the leading '<'
# Line 629  check_newline(uschar *p, FILE *f) Line 678  check_newline(uschar *p, FILE *f)
678  if (strncmp((char *)p, "cr>", 3) == 0) return PCRE_NEWLINE_CR;  if (strncmp((char *)p, "cr>", 3) == 0) return PCRE_NEWLINE_CR;
679  if (strncmp((char *)p, "lf>", 3) == 0) return PCRE_NEWLINE_LF;  if (strncmp((char *)p, "lf>", 3) == 0) return PCRE_NEWLINE_LF;
680  if (strncmp((char *)p, "crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;  if (strncmp((char *)p, "crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
681    if (strncmp((char *)p, "any>", 4) == 0) return PCRE_NEWLINE_ANY;
682  fprintf(f, "Unknown newline type at: <%s\n", p);  fprintf(f, "Unknown newline type at: <%s\n", p);
683  return 0;  return 0;
684  }  }
# Line 636  return 0; Line 686  return 0;
686    
687    
688  /*************************************************  /*************************************************
689    *             Usage function                     *
690    *************************************************/
691    
692    static void
693    usage(void)
694    {
695    printf("Usage:     pcretest [options] [<input> [<output>]]\n");
696    printf("  -b       show compiled code (bytecode)\n");
697    printf("  -C       show PCRE compile-time options and exit\n");
698    printf("  -d       debug: show compiled code and information (-b and -i)\n");
699    #if !defined NODFA
700    printf("  -dfa     force DFA matching for all subjects\n");
701    #endif
702    printf("  -help    show usage information\n");
703    printf("  -i       show information about compiled patterns\n"
704           "  -m       output memory used information\n"
705           "  -o <n>   set size of offsets vector to <n>\n");
706    #if !defined NOPOSIX
707    printf("  -p       use POSIX interface\n");
708    #endif
709    printf("  -q       quiet: do not output PCRE version number at start\n");
710    printf("  -S <n>   set stack size to <n> megabytes\n");
711    printf("  -s       output store (memory) used information\n"
712           "  -t       time compilation and execution\n");
713    printf("  -t <n>   time compilation and execution, repeating <n> times\n");
714    printf("  -tm      time execution (matching) only\n");
715    printf("  -tm <n>  time execution (matching) only, repeating <n> times\n");
716    }
717    
718    
719    
720    /*************************************************
721  *                Main Program                    *  *                Main Program                    *
722  *************************************************/  *************************************************/
723    
# Line 650  int options = 0; Line 732  int options = 0;
732  int study_options = 0;  int study_options = 0;
733  int op = 1;  int op = 1;
734  int timeit = 0;  int timeit = 0;
735    int timeitm = 0;
736  int showinfo = 0;  int showinfo = 0;
737  int showstore = 0;  int showstore = 0;
738  int quiet = 0;  int quiet = 0;
# Line 681  buffer = (unsigned char *)malloc(buffer_ Line 764  buffer = (unsigned char *)malloc(buffer_
764  dbuffer = (unsigned char *)malloc(buffer_size);  dbuffer = (unsigned char *)malloc(buffer_size);
765  pbuffer = (unsigned char *)malloc(buffer_size);  pbuffer = (unsigned char *)malloc(buffer_size);
766    
767  /* The outfile variable is static so that new_malloc can use it. The _setmode()  /* The outfile variable is static so that new_malloc can use it. */
 stuff is some magic that I don't understand, but which apparently does good  
 things in Windows. It's related to line terminations.  */  
   
 #if defined(_WIN32) || defined(WIN32)  
 _setmode( _fileno( stdout ), 0x8000 );  
 #endif  /* defined(_WIN32) || defined(WIN32) */  
768    
769  outfile = stdout;  outfile = stdout;
770    
771    /* The following  _setmode() stuff is some Windows magic that tells its runtime
772    library to translate CRLF into a single LF character. At least, that's what
773    I've been told: never having used Windows I take this all on trust. Originally
774    it set 0x8000, but then I was advised that _O_BINARY was better. */
775    
776    #if defined(_WIN32) || defined(WIN32)
777    _setmode( _fileno( stdout ), _O_BINARY );
778    #endif
779    
780  /* Scan options */  /* Scan options */
781    
782  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
# Line 699  while (argc > 1 && argv[op][0] == '-') Line 785  while (argc > 1 && argv[op][0] == '-')
785    
786    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
787      showstore = 1;      showstore = 1;
   else if (strcmp(argv[op], "-t") == 0) timeit = 1;  
788    else if (strcmp(argv[op], "-q") == 0) quiet = 1;    else if (strcmp(argv[op], "-q") == 0) quiet = 1;
789      else if (strcmp(argv[op], "-b") == 0) debug = 1;
790    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
791    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
792  #if !defined NODFA  #if !defined NODFA
# Line 713  while (argc > 1 && argv[op][0] == '-') Line 799  while (argc > 1 && argv[op][0] == '-')
799      op++;      op++;
800      argc--;      argc--;
801      }      }
802      else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
803        {
804        int both = argv[op][2] == 0;
805        int temp;
806        if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
807                         *endptr == 0))
808          {
809          timeitm = temp;
810          op++;
811          argc--;
812          }
813        else timeitm = LOOPREPEAT;
814        if (both) timeit = timeitm;
815        }
816    else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&    else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
817        ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),        ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
818          *endptr == 0))          *endptr == 0))
819      {      {
820  #ifdef _WIN32  #if defined(_WIN32) || defined(WIN32)
821      printf("PCRE: -S not supported on this OS\n");      printf("PCRE: -S not supported on this OS\n");
822      exit(1);      exit(1);
823  #else  #else
# Line 749  while (argc > 1 && argv[op][0] == '-') Line 849  while (argc > 1 && argv[op][0] == '-')
849      printf("  %sUnicode properties support\n", rc? "" : "No ");      printf("  %sUnicode properties support\n", rc? "" : "No ");
850      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
851      printf("  Newline sequence is %s\n", (rc == '\r')? "CR" :      printf("  Newline sequence is %s\n", (rc == '\r')? "CR" :
852        (rc == '\n')? "LF" : "CRLF");        (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :
853          (rc == -1)? "ANY" : "???");
854      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
855      printf("  Internal link size = %d\n", rc);      printf("  Internal link size = %d\n", rc);
856      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
# Line 760  while (argc > 1 && argv[op][0] == '-') Line 861  while (argc > 1 && argv[op][0] == '-')
861      printf("  Default recursion depth limit = %d\n", rc);      printf("  Default recursion depth limit = %d\n", rc);
862      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
863      printf("  Match recursion uses %s\n", rc? "stack" : "heap");      printf("  Match recursion uses %s\n", rc? "stack" : "heap");
864      exit(0);      goto EXIT;
865        }
866      else if (strcmp(argv[op], "-help") == 0 ||
867               strcmp(argv[op], "--help") == 0)
868        {
869        usage();
870        goto EXIT;
871      }      }
872    else    else
873      {      {
874      printf("** Unknown or malformed option %s\n", argv[op]);      printf("** Unknown or malformed option %s\n", argv[op]);
875      printf("Usage:   pcretest [options] [<input> [<output>]]\n");      usage();
     printf("  -C     show PCRE compile-time options and exit\n");  
     printf("  -d     debug: show compiled code; implies -i\n");  
 #if !defined NODFA  
     printf("  -dfa   force DFA matching for all subjects\n");  
 #endif  
     printf("  -i     show information about compiled pattern\n"  
            "  -m     output memory used information\n"  
            "  -o <n> set size of offsets vector to <n>\n");  
 #if !defined NOPOSIX  
     printf("  -p     use POSIX interface\n");  
 #endif  
     printf("  -S <n> set stack size to <n> megabytes\n");  
     printf("  -s     output store (memory) used information\n"  
            "  -t     time compilation and execution\n");  
876      yield = 1;      yield = 1;
877      goto EXIT;      goto EXIT;
878      }      }
# Line 803  if (offsets == NULL) Line 896  if (offsets == NULL)
896    
897  if (argc > 1)  if (argc > 1)
898    {    {
899    infile = fopen(argv[op], "rb");    infile = fopen(argv[op], INPUT_MODE);
900    if (infile == NULL)    if (infile == NULL)
901      {      {
902      printf("** Failed to open %s\n", argv[op]);      printf("** Failed to open %s\n", argv[op]);
# Line 814  if (argc > 1) Line 907  if (argc > 1)
907    
908  if (argc > 2)  if (argc > 2)
909    {    {
910    outfile = fopen(argv[op+1], "wb");    outfile = fopen(argv[op+1], OUTPUT_MODE);
911    if (outfile == NULL)    if (outfile == NULL)
912      {      {
913      printf("** Failed to open %s\n", argv[op+1]);      printf("** Failed to open %s\n", argv[op+1]);
# Line 854  while (!done) Line 947  while (!done)
947    size_t size, regex_gotten_store;    size_t size, regex_gotten_store;
948    int do_study = 0;    int do_study = 0;
949    int do_debug = debug;    int do_debug = debug;
950      int debug_lengths = 1;
951    int do_G = 0;    int do_G = 0;
952    int do_g = 0;    int do_g = 0;
953    int do_showinfo = showinfo;    int do_showinfo = showinfo;
954    int do_showrest = 0;    int do_showrest = 0;
955    int do_flip = 0;    int do_flip = 0;
956    int erroroffset, len, delimiter;    int erroroffset, len, delimiter, poffset;
957    
958    use_utf8 = 0;    use_utf8 = 0;
959    
# Line 969  while (!done) Line 1063  while (!done)
1063      }      }
1064    
1065    pp = p;    pp = p;
1066      poffset = p - buffer;
1067    
1068    for(;;)    for(;;)
1069      {      {
# Line 989  while (!done) Line 1084  while (!done)
1084      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1085      }      }
1086    
1087      /* The buffer may have moved while being extended; reset the start of data
1088      pointer to the correct relative point in the buffer. */
1089    
1090      p = buffer + poffset;
1091    
1092    /* If the first character after the delimiter is backslash, make    /* If the first character after the delimiter is backslash, make
1093    the pattern end with backslash. This is purely to provide a way    the pattern end with backslash. This is purely to provide a way
1094    of testing for the error message when a pattern ends with backslash. */    of testing for the error message when a pattern ends with backslash. */
# Line 1020  while (!done) Line 1120  while (!done)
1120    
1121        case '+': do_showrest = 1; break;        case '+': do_showrest = 1; break;
1122        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
1123          case 'B': do_debug = 1; break;
1124        case 'C': options |= PCRE_AUTO_CALLOUT; break;        case 'C': options |= PCRE_AUTO_CALLOUT; break;
1125        case 'D': do_debug = do_showinfo = 1; break;        case 'D': do_debug = do_showinfo = 1; break;
1126        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
# Line 1037  while (!done) Line 1138  while (!done)
1138        case 'S': do_study = 1; break;        case 'S': do_study = 1; break;
1139        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
1140        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
1141          case 'Z': debug_lengths = 0; break;
1142        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1143        case '?': options |= PCRE_NO_UTF8_CHECK; break;        case '?': options |= PCRE_NO_UTF8_CHECK; break;
1144    
1145        case 'L':        case 'L':
1146        ppp = pp;        ppp = pp;
1147        /* The '\r' test here is so that it works on Windows */        /* The '\r' test here is so that it works on Windows. */
1148        while (*ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;        /* The '0' test is just in case this is an unterminated line. */
1149          while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1150        *ppp = 0;        *ppp = 0;
1151        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1152          {          {
1153          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1154          goto SKIP_DATA;          goto SKIP_DATA;
1155          }          }
1156          locale_set = 1;
1157        tables = pcre_maketables();        tables = pcre_maketables();
1158        pp = ppp;        pp = ppp;
1159        break;        break;
# Line 1116  while (!done) Line 1220  while (!done)
1220  #endif  /* !defined NOPOSIX */  #endif  /* !defined NOPOSIX */
1221    
1222      {      {
1223      if (timeit)      if (timeit > 0)
1224        {        {
1225        register int i;        register int i;
1226        clock_t time_taken;        clock_t time_taken;
1227        clock_t start_time = clock();        clock_t start_time = clock();
1228        for (i = 0; i < LOOPREPEAT; i++)        for (i = 0; i < timeit; i++)
1229          {          {
1230          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1231          if (re != NULL) free(re);          if (re != NULL) free(re);
1232          }          }
1233        time_taken = clock() - start_time;        time_taken = clock() - start_time;
1234        fprintf(outfile, "Compile time %.3f milliseconds\n",        fprintf(outfile, "Compile time %.4f milliseconds\n",
1235          (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /          (((double)time_taken * 1000.0) / (double)timeit) /
1236            (double)CLOCKS_PER_SEC);            (double)CLOCKS_PER_SEC);
1237        }        }
1238    
# Line 1180  while (!done) Line 1284  while (!done)
1284    
1285      if (do_study)      if (do_study)
1286        {        {
1287        if (timeit)        if (timeit > 0)
1288          {          {
1289          register int i;          register int i;
1290          clock_t time_taken;          clock_t time_taken;
1291          clock_t start_time = clock();          clock_t start_time = clock();
1292          for (i = 0; i < LOOPREPEAT; i++)          for (i = 0; i < timeit; i++)
1293            extra = pcre_study(re, study_options, &error);            extra = pcre_study(re, study_options, &error);
1294          time_taken = clock() - start_time;          time_taken = clock() - start_time;
1295          if (extra != NULL) free(extra);          if (extra != NULL) free(extra);
1296          fprintf(outfile, "  Study time %.3f milliseconds\n",          fprintf(outfile, "  Study time %.4f milliseconds\n",
1297            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /            (((double)time_taken * 1000.0) / (double)timeit) /
1298              (double)CLOCKS_PER_SEC);              (double)CLOCKS_PER_SEC);
1299          }          }
1300        extra = pcre_study(re, study_options, &error);        extra = pcre_study(re, study_options, &error);
# Line 1233  while (!done) Line 1337  while (!done)
1337    
1338      SHOW_INFO:      SHOW_INFO:
1339    
1340        if (do_debug)
1341          {
1342          fprintf(outfile, "------------------------------------------------------------------\n");
1343          pcre_printint(re, outfile, debug_lengths);
1344          }
1345    
1346      if (do_showinfo)      if (do_showinfo)
1347        {        {
1348        unsigned long int get_options, all_options;        unsigned long int get_options, all_options;
# Line 1243  while (!done) Line 1353  while (!done)
1353        int nameentrysize, namecount;        int nameentrysize, namecount;
1354        const uschar *nametable;        const uschar *nametable;
1355    
       if (do_debug)  
         {  
         fprintf(outfile, "------------------------------------------------------------------\n");  
         pcre_printint(re, outfile);  
         }  
   
1356        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1357        new_info(re, NULL, PCRE_INFO_SIZE, &size);        new_info(re, NULL, PCRE_INFO_SIZE, &size);
1358        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
# Line 1327  while (!done) Line 1431  while (!done)
1431            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1432            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1433    
1434        switch (get_options & PCRE_NEWLINE_CRLF)        switch (get_options & PCRE_NEWLINE_BITS)
1435          {          {
1436          case PCRE_NEWLINE_CR:          case PCRE_NEWLINE_CR:
1437          fprintf(outfile, "Forced newline sequence: CR\n");          fprintf(outfile, "Forced newline sequence: CR\n");
# Line 1341  while (!done) Line 1445  while (!done)
1445          fprintf(outfile, "Forced newline sequence: CRLF\n");          fprintf(outfile, "Forced newline sequence: CRLF\n");
1446          break;          break;
1447    
1448            case PCRE_NEWLINE_ANY:
1449            fprintf(outfile, "Forced newline sequence: ANY\n");
1450            break;
1451    
1452          default:          default:
1453          break;          break;
1454          }          }
# Line 1358  while (!done) Line 1466  while (!done)
1466          int ch = first_char & 255;          int ch = first_char & 255;
1467          const char *caseless = ((first_char & REQ_CASELESS) == 0)?          const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1468            "" : " (caseless)";            "" : " (caseless)";
1469          if (isprint(ch))          if (PRINTHEX(ch))
1470            fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);            fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1471          else          else
1472            fprintf(outfile, "First char = %d%s\n", ch, caseless);            fprintf(outfile, "First char = %d%s\n", ch, caseless);
# Line 1373  while (!done) Line 1481  while (!done)
1481          int ch = need_char & 255;          int ch = need_char & 255;
1482          const char *caseless = ((need_char & REQ_CASELESS) == 0)?          const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1483            "" : " (caseless)";            "" : " (caseless)";
1484          if (isprint(ch))          if (PRINTHEX(ch))
1485            fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);            fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1486          else          else
1487            fprintf(outfile, "Need char = %d%s\n", ch, caseless);            fprintf(outfile, "Need char = %d%s\n", ch, caseless);
# Line 1409  while (!done) Line 1517  while (!done)
1517                    fprintf(outfile, "\n  ");                    fprintf(outfile, "\n  ");
1518                    c = 2;                    c = 2;
1519                    }                    }
1520                  if (isprint(i) && i != ' ')                  if (PRINTHEX(i) && i != ' ')
1521                    {                    {
1522                    fprintf(outfile, "%c ", i);                    fprintf(outfile, "%c ", i);
1523                    c += 2;                    c += 2;
# Line 1468  while (!done) Line 1576  while (!done)
1576                  strerror(errno));                  strerror(errno));
1577                }                }
1578              else fprintf(outfile, "Study data written to %s\n", to_file);              else fprintf(outfile, "Study data written to %s\n", to_file);
1579    
1580              }              }
1581            }            }
1582          fclose(f);          fclose(f);
# Line 1866  while (!done) Line 1975  while (!done)
1975    
1976      for (;; gmatched++)    /* Loop for /g or /G */      for (;; gmatched++)    /* Loop for /g or /G */
1977        {        {
1978        if (timeit)        if (timeitm > 0)
1979          {          {
1980          register int i;          register int i;
1981          clock_t time_taken;          clock_t time_taken;
# Line 1876  while (!done) Line 1985  while (!done)
1985          if (all_use_dfa || use_dfa)          if (all_use_dfa || use_dfa)
1986            {            {
1987            int workspace[1000];            int workspace[1000];
1988            for (i = 0; i < LOOPREPEAT; i++)            for (i = 0; i < timeitm; i++)
1989              count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,              count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
1990                options | g_notempty, use_offsets, use_size_offsets, workspace,                options | g_notempty, use_offsets, use_size_offsets, workspace,
1991                sizeof(workspace)/sizeof(int));                sizeof(workspace)/sizeof(int));
# Line 1884  while (!done) Line 1993  while (!done)
1993          else          else
1994  #endif  #endif
1995    
1996          for (i = 0; i < LOOPREPEAT; i++)          for (i = 0; i < timeitm; i++)
1997            count = pcre_exec(re, extra, (char *)bptr, len,            count = pcre_exec(re, extra, (char *)bptr, len,
1998              start_offset, options | g_notempty, use_offsets, use_size_offsets);              start_offset, options | g_notempty, use_offsets, use_size_offsets);
1999    
2000          time_taken = clock() - start_time;          time_taken = clock() - start_time;
2001          fprintf(outfile, "Execute time %.3f milliseconds\n",          fprintf(outfile, "Execute time %.4f milliseconds\n",
2002            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /            (((double)time_taken * 1000.0) / (double)timeitm) /
2003              (double)CLOCKS_PER_SEC);              (double)CLOCKS_PER_SEC);
2004          }          }
2005    
# Line 1966  while (!done) Line 2075  while (!done)
2075    
2076        if (count >= 0)        if (count >= 0)
2077          {          {
2078          int i;          int i, maxcount;
2079    
2080    #if !defined NODFA
2081            if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2082    #endif
2083              maxcount = use_size_offsets/3;
2084    
2085            /* This is a check against a lunatic return value. */
2086    
2087            if (count > maxcount)
2088              {
2089              fprintf(outfile,
2090                "** PCRE error: returned count %d is too big for offset size %d\n",
2091                count, use_size_offsets);
2092              count = use_size_offsets/3;
2093              if (do_g || do_G)
2094                {
2095                fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2096                do_g = do_G = FALSE;        /* Break g/G loop */
2097                }
2098              }
2099    
2100          for (i = 0; i < count * 2; i += 2)          for (i = 0; i < count * 2; i += 2)
2101            {            {
2102            if (use_offsets[i] < 0)            if (use_offsets[i] < 0)
# Line 2084  while (!done) Line 2214  while (!done)
2214          }          }
2215    
2216        /* Failed to match. If this is a /g or /G loop and we previously set        /* Failed to match. If this is a /g or /G loop and we previously set
2217        g_notempty after a null match, this is not necessarily the end.        g_notempty after a null match, this is not necessarily the end. We want
2218        We want to advance the start offset, and continue. In the case of UTF-8        to advance the start offset, and continue. We won't be at the end of the
2219        matching, the advance must be one character, not one byte. Fudge the        string - that was checked before setting g_notempty.
2220        offset values to achieve this. We won't be at the end of the string -  
2221        that was checked before setting g_notempty. */        Complication arises in the case when the newline option is "any".
2222          If the previous match was at the end of a line terminated by CRLF, an
2223          advance of one character just passes the \r, whereas we should prefer the
2224          longer newline sequence, as does the code in pcre_exec(). Fudge the
2225          offset value to achieve this.
2226    
2227          Otherwise, in the case of UTF-8 matching, the advance must be one
2228          character, not one byte. */
2229    
2230        else        else
2231          {          {
2232          if (g_notempty != 0)          if (g_notempty != 0)
2233            {            {
2234            int onechar = 1;            int onechar = 1;
2235              unsigned int obits = ((real_pcre *)re)->options;
2236            use_offsets[0] = start_offset;            use_offsets[0] = start_offset;
2237            if (use_utf8)            if ((obits & PCRE_NEWLINE_BITS) == 0)
2238                {
2239                int d;
2240                (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2241                obits = (d == '\r')? PCRE_NEWLINE_CR :
2242                        (d == '\n')? PCRE_NEWLINE_LF :
2243                        (d == ('\r'<<8 | '\n'))? PCRE_NEWLINE_CRLF :
2244                        (d == -1)? PCRE_NEWLINE_ANY : 0;
2245                }
2246              if ((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY &&
2247                  start_offset < len - 1 &&
2248                  bptr[start_offset] == '\r' &&
2249                  bptr[start_offset+1] == '\n')
2250                onechar++;
2251              else if (use_utf8)
2252              {              {
2253              while (start_offset + onechar < len)              while (start_offset + onechar < len)
2254                {                {
# Line 2131  while (!done) Line 2283  while (!done)
2283        character. */        character. */
2284    
2285        g_notempty = 0;        g_notempty = 0;
2286    
2287        if (use_offsets[0] == use_offsets[1])        if (use_offsets[0] == use_offsets[1])
2288          {          {
2289          if (use_offsets[0] == len) break;          if (use_offsets[0] == len) break;
# Line 2165  while (!done) Line 2318  while (!done)
2318      {      {
2319      new_free((void *)tables);      new_free((void *)tables);
2320      setlocale(LC_CTYPE, "C");      setlocale(LC_CTYPE, "C");
2321        locale_set = 0;
2322      }      }
2323    }    }
2324    

Legend:
Removed from v.91  
changed lines
  Added in v.146

  ViewVC Help
Powered by ViewVC 1.1.5