/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 73 by nigel, Sat Feb 24 21:40:30 2007 UTC revision 87 by nigel, Sat Feb 24 21:41:21 2007 UTC
# Line 4  Line 4 
4    
5  /* This program was hacked up as a tester for PCRE. I really should have  /* This program was hacked up as a tester for PCRE. I really should have
6  written it more tidily in the first place. Will I ever learn? It has grown and  written it more tidily in the first place. Will I ever learn? It has grown and
7  been extended and consequently is now rather untidy in places. */  been extended and consequently is now rather, er, *very* untidy in places.
8    
9    -----------------------------------------------------------------------------
10    Redistribution and use in source and binary forms, with or without
11    modification, are permitted provided that the following conditions are met:
12    
13        * Redistributions of source code must retain the above copyright notice,
14          this list of conditions and the following disclaimer.
15    
16        * Redistributions in binary form must reproduce the above copyright
17          notice, this list of conditions and the following disclaimer in the
18          documentation and/or other materials provided with the distribution.
19    
20        * Neither the name of the University of Cambridge nor the names of its
21          contributors may be used to endorse or promote products derived from
22          this software without specific prior written permission.
23    
24    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34    POSSIBILITY OF SUCH DAMAGE.
35    -----------------------------------------------------------------------------
36    */
37    
38    
39  #include <ctype.h>  #include <ctype.h>
40  #include <stdio.h>  #include <stdio.h>
# Line 12  been extended and consequently is now ra Line 42  been extended and consequently is now ra
42  #include <stdlib.h>  #include <stdlib.h>
43  #include <time.h>  #include <time.h>
44  #include <locale.h>  #include <locale.h>
45    #include <errno.h>
 /* We need the internal info for displaying the results of pcre_study(). Also  
 for getting the opcodes for showing compiled code. */  
46    
47  #define PCRE_SPY        /* For Win32 build, import data, not export */  #define PCRE_SPY        /* For Win32 build, import data, not export */
48  #include "internal.h"  
49    /* We include pcre_internal.h because we need the internal info for displaying
50    the results of pcre_study() and we also need to know about the internal
51    macros, structures, and other internal data values; pcretest has "inside
52    information" compared to a program that strictly follows the PCRE API. */
53    
54    #include "pcre_internal.h"
55    
56    /* We need access to the data tables that PCRE uses. So as not to have to keep
57    two copies, we include the source file here, changing the names of the external
58    symbols to prevent clashes. */
59    
60    #define _pcre_utf8_table1      utf8_table1
61    #define _pcre_utf8_table1_size utf8_table1_size
62    #define _pcre_utf8_table2      utf8_table2
63    #define _pcre_utf8_table3      utf8_table3
64    #define _pcre_utf8_table4      utf8_table4
65    #define _pcre_utt              utt
66    #define _pcre_utt_size         utt_size
67    #define _pcre_OP_lengths       OP_lengths
68    
69    #include "pcre_tables.c"
70    
71    /* We also need the pcre_printint() function for printing out compiled
72    patterns. This function is in a separate file so that it can be included in
73    pcre_compile.c when that module is compiled with debugging enabled. */
74    
75    #include "pcre_printint.src"
76    
77    
78  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
79  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 27  Makefile. */ Line 83  Makefile. */
83  #include "pcreposix.h"  #include "pcreposix.h"
84  #endif  #endif
85    
86    /* It is also possible, for the benefit of the version imported into Exim, to
87    build pcretest without support for UTF8 (define NOUTF8), without the interface
88    to the DFA matcher (NODFA), and without the doublecheck of the old "info"
89    function (define NOINFOCHECK). */
90    
91    
92    /* Other parameters */
93    
94  #ifndef CLOCKS_PER_SEC  #ifndef CLOCKS_PER_SEC
95  #ifdef CLK_TCK  #ifdef CLK_TCK
96  #define CLOCKS_PER_SEC CLK_TCK  #define CLOCKS_PER_SEC CLK_TCK
# Line 35  Makefile. */ Line 99  Makefile. */
99  #endif  #endif
100  #endif  #endif
101    
102  #define LOOPREPEAT 50000  #define LOOPREPEAT 500000
103    
104  #define BUFFER_SIZE 30000  #define BUFFER_SIZE 30000
105    #define PBUFFER_SIZE BUFFER_SIZE
106  #define DBUFFER_SIZE BUFFER_SIZE  #define DBUFFER_SIZE BUFFER_SIZE
107    
108    
109    /* Static variables */
110    
111  static FILE *outfile;  static FILE *outfile;
112  static int log_store = 0;  static int log_store = 0;
113  static int callout_count;  static int callout_count;
# Line 52  static int show_malloc; Line 119  static int show_malloc;
119  static int use_utf8;  static int use_utf8;
120  static size_t gotten_store;  static size_t gotten_store;
121    
122    static uschar *pbuffer = NULL;
 static const int utf8_table1[] = {  
   0x0000007f, 0x000007ff, 0x0000ffff, 0x001fffff, 0x03ffffff, 0x7fffffff};  
   
 static const int utf8_table2[] = {  
   0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};  
   
 static const int utf8_table3[] = {  
   0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};  
   
   
   
 /*************************************************  
 *         Print compiled regex                   *  
 *************************************************/  
   
 /* The code for doing this is held in a separate file that is also included in  
 pcre.c when it is compiled with the debug switch. It defines a function called  
 print_internals(), which uses a table of opcode lengths defined by the macro  
 OP_LENGTHS, whose name must be OP_lengths. */  
   
 static uschar OP_lengths[] = { OP_LENGTHS };  
   
 #include "printint.c"  
123    
124    
125    
# Line 106  return(result); Line 150  return(result);
150    
151    
152    
 /*************************************************  
 *       Convert character value to UTF-8         *  
 *************************************************/  
   
 /* This function takes an integer value in the range 0 - 0x7fffffff  
 and encodes it as a UTF-8 character in 0 to 6 bytes.  
   
 Arguments:  
   cvalue     the character value  
   buffer     pointer to buffer for result - at least 6 bytes long  
   
 Returns:     number of characters placed in the buffer  
              -1 if input character is negative  
              0 if input character is positive but too big (only when  
              int is longer than 32 bits)  
 */  
   
 static int  
 ord2utf8(int cvalue, unsigned char *buffer)  
 {  
 register int i, j;  
 for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++)  
   if (cvalue <= utf8_table1[i]) break;  
 if (i >= sizeof(utf8_table1)/sizeof(int)) return 0;  
 if (cvalue < 0) return -1;  
   
 buffer += i;  
 for (j = i; j > 0; j--)  
  {  
  *buffer-- = 0x80 | (cvalue & 0x3f);  
  cvalue >>= 6;  
  }  
 *buffer = utf8_table2[i] | cvalue;  
 return i + 1;  
 }  
   
153    
154  /*************************************************  /*************************************************
155  *            Convert UTF-8 string to value       *  *            Convert UTF-8 string to value       *
# Line 158  Returns:   >  0 => the number of bytes c Line 166  Returns:   >  0 => the number of bytes c
166             -6 to 0 => malformed UTF-8 character at offset = (-return)             -6 to 0 => malformed UTF-8 character at offset = (-return)
167  */  */
168    
169    #if !defined NOUTF8
170    
171  static int  static int
172  utf82ord(unsigned char *buffer, int *vptr)  utf82ord(unsigned char *buffer, int *vptr)
173  {  {
# Line 189  for (j = 0; j < i; j++) Line 199  for (j = 0; j < i; j++)
199    
200  /* Check that encoding was the correct unique one */  /* Check that encoding was the correct unique one */
201    
202  for (j = 0; j < sizeof(utf8_table1)/sizeof(int); j++)  for (j = 0; j < utf8_table1_size; j++)
203    if (d <= utf8_table1[j]) break;    if (d <= utf8_table1[j]) break;
204  if (j != i) return -(i+1);  if (j != i) return -(i+1);
205    
# Line 199  if (j != i) return -(i+1); Line 209  if (j != i) return -(i+1);
209  return i+1;  return i+1;
210  }  }
211    
212    #endif
213    
214    
215    
216    /*************************************************
217    *       Convert character value to UTF-8         *
218    *************************************************/
219    
220    /* This function takes an integer value in the range 0 - 0x7fffffff
221    and encodes it as a UTF-8 character in 0 to 6 bytes.
222    
223    Arguments:
224      cvalue     the character value
225      buffer     pointer to buffer for result - at least 6 bytes long
226    
227    Returns:     number of characters placed in the buffer
228    */
229    
230    static int
231    ord2utf8(int cvalue, uschar *buffer)
232    {
233    register int i, j;
234    for (i = 0; i < utf8_table1_size; i++)
235      if (cvalue <= utf8_table1[i]) break;
236    buffer += i;
237    for (j = i; j > 0; j--)
238     {
239     *buffer-- = 0x80 | (cvalue & 0x3f);
240     cvalue >>= 6;
241     }
242    *buffer = utf8_table2[i] | cvalue;
243    return i + 1;
244    }
245    
246    
247    
248  /*************************************************  /*************************************************
# Line 211  chars without printing. */ Line 255  chars without printing. */
255    
256  static int pchars(unsigned char *p, int length, FILE *f)  static int pchars(unsigned char *p, int length, FILE *f)
257  {  {
258  int c;  int c = 0;
259  int yield = 0;  int yield = 0;
260    
261  while (length-- > 0)  while (length-- > 0)
262    {    {
263    #if !defined NOUTF8
264    if (use_utf8)    if (use_utf8)
265      {      {
266      int rc = utf82ord(p, &c);      int rc = utf82ord(p, &c);
# Line 238  while (length-- > 0) Line 283  while (length-- > 0)
283        continue;        continue;
284        }        }
285      }      }
286    #endif
287    
288     /* Not UTF-8, or malformed UTF-8  */     /* Not UTF-8, or malformed UTF-8  */
289    
# Line 269  data is not zero. */ Line 315  data is not zero. */
315  static int callout(pcre_callout_block *cb)  static int callout(pcre_callout_block *cb)
316  {  {
317  FILE *f = (first_callout | callout_extra)? outfile : NULL;  FILE *f = (first_callout | callout_extra)? outfile : NULL;
318  int i, pre_start, post_start;  int i, pre_start, post_start, subject_length;
319    
320  if (callout_extra)  if (callout_extra)
321    {    {
# Line 300  pre_start = pchars((unsigned char *)cb-> Line 346  pre_start = pchars((unsigned char *)cb->
346  post_start = pchars((unsigned char *)(cb->subject + cb->start_match),  post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
347    cb->current_position - cb->start_match, f);    cb->current_position - cb->start_match, f);
348    
349    subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
350    
351  (void)pchars((unsigned char *)(cb->subject + cb->current_position),  (void)pchars((unsigned char *)(cb->subject + cb->current_position),
352    cb->subject_length - cb->current_position, f);    cb->subject_length - cb->current_position, f);
353    
354  if (f != NULL) fprintf(f, "\n");  if (f != NULL) fprintf(f, "\n");
355    
356  /* Always print appropriate indicators, with callout number if not already  /* Always print appropriate indicators, with callout number if not already
357  shown */  shown. For automatic callouts, show the pattern offset. */
358    
359  if (callout_extra) fprintf(outfile, "    ");  if (cb->callout_number == 255)
360    else fprintf(outfile, "%3d ", cb->callout_number);    {
361      fprintf(outfile, "%+3d ", cb->pattern_position);
362      if (cb->pattern_position > 99) fprintf(outfile, "\n    ");
363      }
364    else
365      {
366      if (callout_extra) fprintf(outfile, "    ");
367        else fprintf(outfile, "%3d ", cb->callout_number);
368      }
369    
370  for (i = 0; i < pre_start; i++) fprintf(outfile, " ");  for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
371  fprintf(outfile, "^");  fprintf(outfile, "^");
# Line 320  if (post_start > 0) Line 376  if (post_start > 0)
376    fprintf(outfile, "^");    fprintf(outfile, "^");
377    }    }
378    
379    for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
380      fprintf(outfile, " ");
381    
382    fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
383      pbuffer + cb->pattern_position);
384    
385  fprintf(outfile, "\n");  fprintf(outfile, "\n");
386  first_callout = 0;  first_callout = 0;
387    
# Line 350  static void *new_malloc(size_t size) Line 412  static void *new_malloc(size_t size)
412  void *block = malloc(size);  void *block = malloc(size);
413  gotten_store = size;  gotten_store = size;
414  if (show_malloc)  if (show_malloc)
415    fprintf(outfile, "malloc       %3d %p\n", size, block);    fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
416  return block;  return block;
417  }  }
418    
# Line 368  static void *stack_malloc(size_t size) Line 430  static void *stack_malloc(size_t size)
430  {  {
431  void *block = malloc(size);  void *block = malloc(size);
432  if (show_malloc)  if (show_malloc)
433    fprintf(outfile, "stack_malloc %3d %p\n", size, block);    fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
434  return block;  return block;
435  }  }
436    
# Line 396  if ((rc = pcre_fullinfo(re, study, optio Line 458  if ((rc = pcre_fullinfo(re, study, optio
458    
459    
460  /*************************************************  /*************************************************
461    *         Byte flipping function                 *
462    *************************************************/
463    
464    static long int
465    byteflip(long int value, int n)
466    {
467    if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
468    return ((value & 0x000000ff) << 24) |
469           ((value & 0x0000ff00) <<  8) |
470           ((value & 0x00ff0000) >>  8) |
471           ((value & 0xff000000) >> 24);
472    }
473    
474    
475    
476    
477    /*************************************************
478    *        Check match or recursion limit          *
479    *************************************************/
480    
481    static int
482    check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
483      int start_offset, int options, int *use_offsets, int use_size_offsets,
484      int flag, unsigned long int *limit, int errnumber, const char *msg)
485    {
486    int count;
487    int min = 0;
488    int mid = 64;
489    int max = -1;
490    
491    extra->flags |= flag;
492    
493    for (;;)
494      {
495      *limit = mid;
496    
497      count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
498        use_offsets, use_size_offsets);
499    
500      if (count == errnumber)
501        {
502        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
503        min = mid;
504        mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
505        }
506    
507      else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
508                             count == PCRE_ERROR_PARTIAL)
509        {
510        if (mid == min + 1)
511          {
512          fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
513          break;
514          }
515        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
516        max = mid;
517        mid = (min + mid)/2;
518        }
519      else break;    /* Some other error */
520      }
521    
522    extra->flags &= ~flag;
523    return count;
524    }
525    
526    
527    
528    /*************************************************
529  *                Main Program                    *  *                Main Program                    *
530  *************************************************/  *************************************************/
531    
# Line 412  int op = 1; Line 542  int op = 1;
542  int timeit = 0;  int timeit = 0;
543  int showinfo = 0;  int showinfo = 0;
544  int showstore = 0;  int showstore = 0;
545    int quiet = 0;
546  int size_offsets = 45;  int size_offsets = 45;
547  int size_offsets_max;  int size_offsets_max;
548  int *offsets;  int *offsets = NULL;
549  #if !defined NOPOSIX  #if !defined NOPOSIX
550  int posix = 0;  int posix = 0;
551  #endif  #endif
552  int debug = 0;  int debug = 0;
553  int done = 0;  int done = 0;
554    int all_use_dfa = 0;
555    int yield = 0;
556    
557  unsigned char *buffer;  unsigned char *buffer;
558  unsigned char *dbuffer;  unsigned char *dbuffer;
# Line 429  when I am debugging. */ Line 562  when I am debugging. */
562    
563  buffer = (unsigned char *)malloc(BUFFER_SIZE);  buffer = (unsigned char *)malloc(BUFFER_SIZE);
564  dbuffer = (unsigned char *)malloc(DBUFFER_SIZE);  dbuffer = (unsigned char *)malloc(DBUFFER_SIZE);
565    pbuffer = (unsigned char *)malloc(PBUFFER_SIZE);
566    
567  /* Static so that new_malloc can use it. */  /* The outfile variable is static so that new_malloc can use it. The _setmode()
568    stuff is some magic that I don't understand, but which apparently does good
569    things in Windows. It's related to line terminations.  */
570    
571    #if defined(_WIN32) || defined(WIN32)
572    _setmode( _fileno( stdout ), 0x8000 );
573    #endif  /* defined(_WIN32) || defined(WIN32) */
574    
575  outfile = stdout;  outfile = stdout;
576    
# Line 443  while (argc > 1 && argv[op][0] == '-') Line 583  while (argc > 1 && argv[op][0] == '-')
583    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
584      showstore = 1;      showstore = 1;
585    else if (strcmp(argv[op], "-t") == 0) timeit = 1;    else if (strcmp(argv[op], "-t") == 0) timeit = 1;
586      else if (strcmp(argv[op], "-q") == 0) quiet = 1;
587    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
588    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
589    #if !defined NODFA
590      else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
591    #endif
592    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
593        ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),        ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
594          *endptr == 0))          *endptr == 0))
# Line 462  while (argc > 1 && argv[op][0] == '-') Line 606  while (argc > 1 && argv[op][0] == '-')
606      printf("Compiled with\n");      printf("Compiled with\n");
607      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
608      printf("  %sUTF-8 support\n", rc? "" : "No ");      printf("  %sUTF-8 support\n", rc? "" : "No ");
609        (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
610        printf("  %sUnicode properties support\n", rc? "" : "No ");
611      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
612      printf("  Newline character is %s\n", (rc == '\r')? "CR" : "LF");      printf("  Newline character is %s\n", (rc == '\r')? "CR" : "LF");
613      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
# Line 470  while (argc > 1 && argv[op][0] == '-') Line 616  while (argc > 1 && argv[op][0] == '-')
616      printf("  POSIX malloc threshold = %d\n", rc);      printf("  POSIX malloc threshold = %d\n", rc);
617      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
618      printf("  Default match limit = %d\n", rc);      printf("  Default match limit = %d\n", rc);
619        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);
620        printf("  Default recursion depth limit = %d\n", rc);
621      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
622      printf("  Match recursion uses %s\n", rc? "stack" : "heap");      printf("  Match recursion uses %s\n", rc? "stack" : "heap");
623      exit(0);      exit(0);
# Line 479  while (argc > 1 && argv[op][0] == '-') Line 627  while (argc > 1 && argv[op][0] == '-')
627      printf("** Unknown or malformed option %s\n", argv[op]);      printf("** Unknown or malformed option %s\n", argv[op]);
628      printf("Usage:   pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");      printf("Usage:   pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");
629      printf("  -C     show PCRE compile-time options and exit\n");      printf("  -C     show PCRE compile-time options and exit\n");
630      printf("  -d     debug: show compiled code; implies -i\n"      printf("  -d     debug: show compiled code; implies -i\n");
631             "  -i     show information about compiled pattern\n"  #if !defined NODFA
632        printf("  -dfa   force DFA matching for all subjects\n");
633    #endif
634        printf("  -i     show information about compiled pattern\n"
635               "  -m     output memory used information\n"
636             "  -o <n> set size of offsets vector to <n>\n");             "  -o <n> set size of offsets vector to <n>\n");
637  #if !defined NOPOSIX  #if !defined NOPOSIX
638      printf("  -p     use POSIX interface\n");      printf("  -p     use POSIX interface\n");
639  #endif  #endif
640      printf("  -s     output store information\n"      printf("  -s     output store (memory) used information\n"
641             "  -t     time compilation and execution\n");             "  -t     time compilation and execution\n");
642      return 1;      yield = 1;
643        goto EXIT;
644      }      }
645    op++;    op++;
646    argc--;    argc--;
# Line 501  if (offsets == NULL) Line 654  if (offsets == NULL)
654    {    {
655    printf("** Failed to get %d bytes of memory for offsets vector\n",    printf("** Failed to get %d bytes of memory for offsets vector\n",
656      size_offsets_max * sizeof(int));      size_offsets_max * sizeof(int));
657    return 1;    yield = 1;
658      goto EXIT;
659    }    }
660    
661  /* Sort out the input and output files */  /* Sort out the input and output files */
662    
663  if (argc > 1)  if (argc > 1)
664    {    {
665    infile = fopen(argv[op], "r");    infile = fopen(argv[op], "rb");
666    if (infile == NULL)    if (infile == NULL)
667      {      {
668      printf("** Failed to open %s\n", argv[op]);      printf("** Failed to open %s\n", argv[op]);
669      return 1;      yield = 1;
670        goto EXIT;
671      }      }
672    }    }
673    
674  if (argc > 2)  if (argc > 2)
675    {    {
676    outfile = fopen(argv[op+1], "w");    outfile = fopen(argv[op+1], "wb");
677    if (outfile == NULL)    if (outfile == NULL)
678      {      {
679      printf("** Failed to open %s\n", argv[op+1]);      printf("** Failed to open %s\n", argv[op+1]);
680      return 1;      yield = 1;
681        goto EXIT;
682      }      }
683    }    }
684    
# Line 533  pcre_free = new_free; Line 689  pcre_free = new_free;
689  pcre_stack_malloc = stack_malloc;  pcre_stack_malloc = stack_malloc;
690  pcre_stack_free = stack_free;  pcre_stack_free = stack_free;
691    
692  /* Heading line, then prompt for first regex if stdin */  /* Heading line unless quiet, then prompt for first regex if stdin */
693    
694  fprintf(outfile, "PCRE version %s\n\n", pcre_version());  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
695    
696  /* Main loop */  /* Main loop */
697    
# Line 551  while (!done) Line 707  while (!done)
707    
708    const char *error;    const char *error;
709    unsigned char *p, *pp, *ppp;    unsigned char *p, *pp, *ppp;
710      unsigned char *to_file = NULL;
711    const unsigned char *tables = NULL;    const unsigned char *tables = NULL;
712      unsigned long int true_size, true_study_size = 0;
713      size_t size, regex_gotten_store;
714    int do_study = 0;    int do_study = 0;
715    int do_debug = debug;    int do_debug = debug;
716    int do_G = 0;    int do_G = 0;
717    int do_g = 0;    int do_g = 0;
718    int do_showinfo = showinfo;    int do_showinfo = showinfo;
719    int do_showrest = 0;    int do_showrest = 0;
720      int do_flip = 0;
721    int erroroffset, len, delimiter;    int erroroffset, len, delimiter;
722    
723    use_utf8 = 0;    use_utf8 = 0;
# Line 571  while (!done) Line 731  while (!done)
731    while (isspace(*p)) p++;    while (isspace(*p)) p++;
732    if (*p == 0) continue;    if (*p == 0) continue;
733    
734    /* Get the delimiter and seek the end of the pattern; if is isn't    /* See if the pattern is to be loaded pre-compiled from a file. */
735    complete, read more. */  
736      if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
737        {
738        unsigned long int magic;
739        uschar sbuf[8];
740        FILE *f;
741    
742        p++;
743        pp = p + (int)strlen((char *)p);
744        while (isspace(pp[-1])) pp--;
745        *pp = 0;
746    
747        f = fopen((char *)p, "rb");
748        if (f == NULL)
749          {
750          fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
751          continue;
752          }
753    
754        if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
755    
756        true_size =
757          (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
758        true_study_size =
759          (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
760    
761        re = (real_pcre *)new_malloc(true_size);
762        regex_gotten_store = gotten_store;
763    
764        if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
765    
766        magic = ((real_pcre *)re)->magic_number;
767        if (magic != MAGIC_NUMBER)
768          {
769          if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
770            {
771            do_flip = 1;
772            }
773          else
774            {
775            fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
776            fclose(f);
777            continue;
778            }
779          }
780    
781        fprintf(outfile, "Compiled regex%s loaded from %s\n",
782          do_flip? " (byte-inverted)" : "", p);
783    
784        /* Need to know if UTF-8 for printing data strings */
785    
786        new_info(re, NULL, PCRE_INFO_OPTIONS, &options);
787        use_utf8 = (options & PCRE_UTF8) != 0;
788    
789        /* Now see if there is any following study data */
790    
791        if (true_study_size != 0)
792          {
793          pcre_study_data *psd;
794    
795          extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
796          extra->flags = PCRE_EXTRA_STUDY_DATA;
797    
798          psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
799          extra->study_data = psd;
800    
801          if (fread(psd, 1, true_study_size, f) != true_study_size)
802            {
803            FAIL_READ:
804            fprintf(outfile, "Failed to read data from %s\n", p);
805            if (extra != NULL) new_free(extra);
806            if (re != NULL) new_free(re);
807            fclose(f);
808            continue;
809            }
810          fprintf(outfile, "Study data loaded from %s\n", p);
811          do_study = 1;     /* To get the data output if requested */
812          }
813        else fprintf(outfile, "No study data\n");
814    
815        fclose(f);
816        goto SHOW_INFO;
817        }
818    
819      /* In-line pattern (the usual case). Get the delimiter and seek the end of
820      the pattern; if is isn't complete, read more. */
821    
822    delimiter = *p++;    delimiter = *p++;
823    
# Line 617  while (!done) Line 862  while (!done)
862    
863    if (pp[1] == '\\') *pp++ = '\\';    if (pp[1] == '\\') *pp++ = '\\';
864    
865    /* Terminate the pattern at the delimiter */    /* Terminate the pattern at the delimiter, and save a copy of the pattern
866      for callouts. */
867    
868    *pp++ = 0;    *pp++ = 0;
869      strcpy((char *)pbuffer, (char *)p);
870    
871    /* Look for options after final delimiter */    /* Look for options after final delimiter */
872    
# Line 631  while (!done) Line 878  while (!done)
878      {      {
879      switch (*pp++)      switch (*pp++)
880        {        {
881          case 'f': options |= PCRE_FIRSTLINE; break;
882        case 'g': do_g = 1; break;        case 'g': do_g = 1; break;
883        case 'i': options |= PCRE_CASELESS; break;        case 'i': options |= PCRE_CASELESS; break;
884        case 'm': options |= PCRE_MULTILINE; break;        case 'm': options |= PCRE_MULTILINE; break;
# Line 639  while (!done) Line 887  while (!done)
887    
888        case '+': do_showrest = 1; break;        case '+': do_showrest = 1; break;
889        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
890          case 'C': options |= PCRE_AUTO_CALLOUT; break;
891        case 'D': do_debug = do_showinfo = 1; break;        case 'D': do_debug = do_showinfo = 1; break;
892        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
893          case 'F': do_flip = 1; break;
894        case 'G': do_G = 1; break;        case 'G': do_G = 1; break;
895        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
896        case 'M': log_store = 1; break;        case 'M': log_store = 1; break;
# Line 658  while (!done) Line 908  while (!done)
908    
909        case 'L':        case 'L':
910        ppp = pp;        ppp = pp;
911        while (*ppp != '\n' && *ppp != ' ') ppp++;        /* The '\r' test here is so that it works on Windows */
912          while (*ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
913        *ppp = 0;        *ppp = 0;
914        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
915          {          {
# Line 669  while (!done) Line 920  while (!done)
920        pp = ppp;        pp = ppp;
921        break;        break;
922    
923        case '\n': case ' ': break;        case '>':
924          to_file = pp;
925          while (*pp != 0) pp++;
926          while (isspace(pp[-1])) pp--;
927          *pp = 0;
928          break;
929    
930          case '\r':                      /* So that it works in Windows */
931          case '\n':
932          case ' ':
933          break;
934    
935        default:        default:
936        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
937        goto SKIP_DATA;        goto SKIP_DATA;
# Line 685  while (!done) Line 947  while (!done)
947      {      {
948      int rc;      int rc;
949      int cflags = 0;      int cflags = 0;
950    
951      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
952      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
953        if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
954        if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
955        if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
956    
957      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
958    
959      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 759  while (!done) Line 1026  while (!done)
1026                sizeof(real_pcre) -                sizeof(real_pcre) -
1027                ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));                ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1028    
1029        /* Extract the size for possible writing before possibly flipping it,
1030        and remember the store that was got. */
1031    
1032        true_size = ((real_pcre *)re)->size;
1033        regex_gotten_store = gotten_store;
1034    
1035        /* If /S was present, study the regexp to generate additional info to
1036        help with the matching. */
1037    
1038        if (do_study)
1039          {
1040          if (timeit)
1041            {
1042            register int i;
1043            clock_t time_taken;
1044            clock_t start_time = clock();
1045            for (i = 0; i < LOOPREPEAT; i++)
1046              extra = pcre_study(re, study_options, &error);
1047            time_taken = clock() - start_time;
1048            if (extra != NULL) free(extra);
1049            fprintf(outfile, "  Study time %.3f milliseconds\n",
1050              (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
1051                (double)CLOCKS_PER_SEC);
1052            }
1053          extra = pcre_study(re, study_options, &error);
1054          if (error != NULL)
1055            fprintf(outfile, "Failed to study: %s\n", error);
1056          else if (extra != NULL)
1057            true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1058          }
1059    
1060        /* If the 'F' option was present, we flip the bytes of all the integer
1061        fields in the regex data block and the study block. This is to make it
1062        possible to test PCRE's handling of byte-flipped patterns, e.g. those
1063        compiled on a different architecture. */
1064    
1065        if (do_flip)
1066          {
1067          real_pcre *rre = (real_pcre *)re;
1068          rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));
1069          rre->size = byteflip(rre->size, sizeof(rre->size));
1070          rre->options = byteflip(rre->options, sizeof(rre->options));
1071          rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1072          rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));
1073          rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));
1074          rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));
1075          rre->name_table_offset = byteflip(rre->name_table_offset,
1076            sizeof(rre->name_table_offset));
1077          rre->name_entry_size = byteflip(rre->name_entry_size,
1078            sizeof(rre->name_entry_size));
1079          rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));
1080    
1081          if (extra != NULL)
1082            {
1083            pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1084            rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1085            rsd->options = byteflip(rsd->options, sizeof(rsd->options));
1086            }
1087          }
1088    
1089        /* Extract information from the compiled data if required */
1090    
1091        SHOW_INFO:
1092    
1093      if (do_showinfo)      if (do_showinfo)
1094        {        {
1095        unsigned long int get_options;        unsigned long int get_options, all_options;
1096    #if !defined NOINFOCHECK
1097        int old_first_char, old_options, old_count;        int old_first_char, old_options, old_count;
1098    #endif
1099        int count, backrefmax, first_char, need_char;        int count, backrefmax, first_char, need_char;
1100        int nameentrysize, namecount;        int nameentrysize, namecount;
1101        const uschar *nametable;        const uschar *nametable;
       size_t size;  
1102    
1103        if (do_debug)        if (do_debug)
1104          {          {
1105          fprintf(outfile, "------------------------------------------------------------------\n");          fprintf(outfile, "------------------------------------------------------------------\n");
1106          print_internals(re, outfile);          pcre_printint(re, outfile);
1107          }          }
1108    
1109        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
# Line 784  while (!done) Line 1116  while (!done)
1116        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1117        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1118    
1119    #if !defined NOINFOCHECK
1120        old_count = pcre_info(re, &old_options, &old_first_char);        old_count = pcre_info(re, &old_options, &old_first_char);
1121        if (count < 0) fprintf(outfile,        if (count < 0) fprintf(outfile,
1122          "Error %d from pcre_info()\n", count);          "Error %d from pcre_info()\n", count);
# Line 801  while (!done) Line 1134  while (!done)
1134            "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",            "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1135              get_options, old_options);              get_options, old_options);
1136          }          }
1137    #endif
1138    
1139        if (size != gotten_store) fprintf(outfile,        if (size != regex_gotten_store) fprintf(outfile,
1140          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1141          size, gotten_store);          (int)size, (int)regex_gotten_store);
1142    
1143        fprintf(outfile, "Capturing subpattern count = %d\n", count);        fprintf(outfile, "Capturing subpattern count = %d\n", count);
1144        if (backrefmax > 0)        if (backrefmax > 0)
# Line 822  while (!done) Line 1156  while (!done)
1156            }            }
1157          }          }
1158    
1159          /* The NOPARTIAL bit is a private bit in the options, so we have
1160          to fish it out via out back door */
1161    
1162          all_options = ((real_pcre *)re)->options;
1163          if (do_flip)
1164            {
1165            all_options = byteflip(all_options, sizeof(all_options));
1166            }
1167    
1168          if ((all_options & PCRE_NOPARTIAL) != 0)
1169            fprintf(outfile, "Partial matching not supported\n");
1170    
1171        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
1172          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s\n",
1173            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1174            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1175            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1176            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1177              ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1178            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1179            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1180            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1181            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1182              ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1183            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1184            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "");            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "");
1185    
# Line 871  while (!done) Line 1219  while (!done)
1219          else          else
1220            fprintf(outfile, "Need char = %d%s\n", ch, caseless);            fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1221          }          }
       }  
   
     /* If /S was present, study the regexp to generate additional info to  
     help with the matching. */  
   
     if (do_study)  
       {  
       if (timeit)  
         {  
         register int i;  
         clock_t time_taken;  
         clock_t start_time = clock();  
         for (i = 0; i < LOOPREPEAT; i++)  
           extra = pcre_study(re, study_options, &error);  
         time_taken = clock() - start_time;  
         if (extra != NULL) free(extra);  
         fprintf(outfile, "  Study time %.3f milliseconds\n",  
           (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /  
             (double)CLOCKS_PER_SEC);  
         }  
   
       extra = pcre_study(re, study_options, &error);  
       if (error != NULL)  
         fprintf(outfile, "Failed to study: %s\n", error);  
       else if (extra == NULL)  
         fprintf(outfile, "Study returned NULL\n");  
1222    
1223        /* Don't output study size; at present it is in any case a fixed        /* Don't output study size; at present it is in any case a fixed
1224        value, but it varies, depending on the computer architecture, and        value, but it varies, depending on the computer architecture, and
1225        so messes up the test suite. */        so messes up the test suite. (And with the /F option, it might be
1226          flipped.) */
1227    
1228        else if (do_showinfo)        if (do_study)
1229          {          {
1230          size_t size;          if (extra == NULL)
1231          uschar *start_bits = NULL;            fprintf(outfile, "Study returned NULL\n");
         new_info(re, extra, PCRE_INFO_STUDYSIZE, &size);  
         new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);  
         /* fprintf(outfile, "Study size = %d\n", size); */  
         if (start_bits == NULL)  
           fprintf(outfile, "No starting character set\n");  
1232          else          else
1233            {            {
1234            int i;            uschar *start_bits = NULL;
1235            int c = 24;            new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1236            fprintf(outfile, "Starting character set: ");  
1237            for (i = 0; i < 256; i++)            if (start_bits == NULL)
1238                fprintf(outfile, "No starting byte set\n");
1239              else
1240              {              {
1241              if ((start_bits[i/8] & (1<<(i%8))) != 0)              int i;
1242                int c = 24;
1243                fprintf(outfile, "Starting byte set: ");
1244                for (i = 0; i < 256; i++)
1245                {                {
1246                if (c > 75)                if ((start_bits[i/8] & (1<<(i&7))) != 0)
1247                  {                  {
1248                  fprintf(outfile, "\n  ");                  if (c > 75)
1249                  c = 2;                    {
1250                  }                    fprintf(outfile, "\n  ");
1251                if (isprint(i) && i != ' ')                    c = 2;
1252                  {                    }
1253                  fprintf(outfile, "%c ", i);                  if (isprint(i) && i != ' ')
1254                  c += 2;                    {
1255                  }                    fprintf(outfile, "%c ", i);
1256                else                    c += 2;
1257                  {                    }
1258                  fprintf(outfile, "\\x%02x ", i);                  else
1259                  c += 5;                    {
1260                      fprintf(outfile, "\\x%02x ", i);
1261                      c += 5;
1262                      }
1263                  }                  }
1264                }                }
1265                fprintf(outfile, "\n");
1266              }              }
           fprintf(outfile, "\n");  
1267            }            }
1268          }          }
1269        }        }
1270      }  
1271        /* If the '>' option was present, we write out the regex to a file, and
1272        that is all. The first 8 bytes of the file are the regex length and then
1273        the study length, in big-endian order. */
1274    
1275        if (to_file != NULL)
1276          {
1277          FILE *f = fopen((char *)to_file, "wb");
1278          if (f == NULL)
1279            {
1280            fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1281            }
1282          else
1283            {
1284            uschar sbuf[8];
1285            sbuf[0] = (true_size >> 24)  & 255;
1286            sbuf[1] = (true_size >> 16)  & 255;
1287            sbuf[2] = (true_size >>  8)  & 255;
1288            sbuf[3] = (true_size)  & 255;
1289    
1290            sbuf[4] = (true_study_size >> 24)  & 255;
1291            sbuf[5] = (true_study_size >> 16)  & 255;
1292            sbuf[6] = (true_study_size >>  8)  & 255;
1293            sbuf[7] = (true_study_size)  & 255;
1294    
1295            if (fwrite(sbuf, 1, 8, f) < 8 ||
1296                fwrite(re, 1, true_size, f) < true_size)
1297              {
1298              fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1299              }
1300            else
1301              {
1302              fprintf(outfile, "Compiled regex written to %s\n", to_file);
1303              if (extra != NULL)
1304                {
1305                if (fwrite(extra->study_data, 1, true_study_size, f) <
1306                    true_study_size)
1307                  {
1308                  fprintf(outfile, "Write error on %s: %s\n", to_file,
1309                    strerror(errno));
1310                  }
1311                else fprintf(outfile, "Study data written to %s\n", to_file);
1312                }
1313              }
1314            fclose(f);
1315            }
1316    
1317          new_free(re);
1318          if (extra != NULL) new_free(extra);
1319          if (tables != NULL) new_free((void *)tables);
1320          continue;  /* With next regex */
1321          }
1322        }        /* End of non-POSIX compile */
1323    
1324    /* Read data lines and test them */    /* Read data lines and test them */
1325    
1326    for (;;)    for (;;)
1327      {      {
1328      unsigned char *q;      uschar *q;
1329      unsigned char *bptr = dbuffer;      uschar *bptr = dbuffer;
1330      int *use_offsets = offsets;      int *use_offsets = offsets;
1331      int use_size_offsets = size_offsets;      int use_size_offsets = size_offsets;
1332      int callout_data = 0;      int callout_data = 0;
# Line 961  while (!done) Line 1339  while (!done)
1339      int gmatched = 0;      int gmatched = 0;
1340      int start_offset = 0;      int start_offset = 0;
1341      int g_notempty = 0;      int g_notempty = 0;
1342        int use_dfa = 0;
1343    
1344      options = 0;      options = 0;
1345    
# Line 1016  while (!done) Line 1395  while (!done)
1395    
1396          /* Handle \x{..} specially - new Perl thing for utf8 */          /* Handle \x{..} specially - new Perl thing for utf8 */
1397    
1398    #if !defined NOUTF8
1399          if (*p == '{')          if (*p == '{')
1400            {            {
1401            unsigned char *pt = p;            unsigned char *pt = p;
# Line 1034  while (!done) Line 1414  while (!done)
1414              }              }
1415            /* Not correct form; fall through */            /* Not correct form; fall through */
1416            }            }
1417    #endif
1418    
1419          /* Ordinary \x */          /* Ordinary \x */
1420    
# Line 1045  while (!done) Line 1426  while (!done)
1426            }            }
1427          break;          break;
1428    
1429          case 0:   /* Allows for an empty line */          case 0:   /* \ followed by EOF allows for an empty line */
1430          p--;          p--;
1431          continue;          continue;
1432    
1433            case '>':
1434            while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1435            continue;
1436    
1437          case 'A':  /* Option setting */          case 'A':  /* Option setting */
1438          options |= PCRE_ANCHORED;          options |= PCRE_ANCHORED;
1439          continue;          continue;
# Line 1110  while (!done) Line 1495  while (!done)
1495            }            }
1496          continue;          continue;
1497    
1498    #if !defined NODFA
1499            case 'D':
1500    #if !defined NOPOSIX
1501            if (posix || do_posix)
1502              printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1503            else
1504    #endif
1505              use_dfa = 1;
1506            continue;
1507    
1508            case 'F':
1509            options |= PCRE_DFA_SHORTEST;
1510            continue;
1511    #endif
1512    
1513          case 'G':          case 'G':
1514          if (isdigit(*p))          if (isdigit(*p))
1515            {            {
# Line 1152  while (!done) Line 1552  while (!done)
1552              {              {
1553              printf("** Failed to get %d bytes of memory for offsets vector\n",              printf("** Failed to get %d bytes of memory for offsets vector\n",
1554                size_offsets_max * sizeof(int));                size_offsets_max * sizeof(int));
1555              return 1;              yield = 1;
1556                goto EXIT;
1557              }              }
1558            }            }
1559          use_size_offsets = n;          use_size_offsets = n;
1560          if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */          if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */
1561          continue;          continue;
1562    
1563            case 'P':
1564            options |= PCRE_PARTIAL;
1565            continue;
1566    
1567    #if !defined NODFA
1568            case 'R':
1569            options |= PCRE_DFA_RESTART;
1570            continue;
1571    #endif
1572    
1573          case 'S':          case 'S':
1574          show_malloc = 1;          show_malloc = 1;
1575          continue;          continue;
# Line 1176  while (!done) Line 1587  while (!done)
1587      *q = 0;      *q = 0;
1588      len = q - dbuffer;      len = q - dbuffer;
1589    
1590        if ((all_use_dfa || use_dfa) && find_match_limit)
1591          {
1592          printf("**Match limit not relevant for DFA matching: ignored\n");
1593          find_match_limit = 0;
1594          }
1595    
1596      /* Handle matching via the POSIX interface, which does not      /* Handle matching via the POSIX interface, which does not
1597      support timing or playing with the match limit or callout data. */      support timing or playing with the match limit or callout data. */
1598    
# Line 1197  while (!done) Line 1614  while (!done)
1614          (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);          (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);
1615          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1616          }          }
1617          else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
1618                  != 0)
1619            {
1620            fprintf(outfile, "Matched with REG_NOSUB\n");
1621            }
1622        else        else
1623          {          {
1624          size_t i;          size_t i;
# Line 1233  while (!done) Line 1655  while (!done)
1655          register int i;          register int i;
1656          clock_t time_taken;          clock_t time_taken;
1657          clock_t start_time = clock();          clock_t start_time = clock();
1658    
1659    #if !defined NODFA
1660            if (all_use_dfa || use_dfa)
1661              {
1662              int workspace[1000];
1663              for (i = 0; i < LOOPREPEAT; i++)
1664                count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
1665                  options | g_notempty, use_offsets, use_size_offsets, workspace,
1666                  sizeof(workspace)/sizeof(int));
1667              }
1668            else
1669    #endif
1670    
1671          for (i = 0; i < LOOPREPEAT; i++)          for (i = 0; i < LOOPREPEAT; i++)
1672            count = pcre_exec(re, extra, (char *)bptr, len,            count = pcre_exec(re, extra, (char *)bptr, len,
1673              start_offset, options | g_notempty, use_offsets, use_size_offsets);              start_offset, options | g_notempty, use_offsets, use_size_offsets);
1674    
1675          time_taken = clock() - start_time;          time_taken = clock() - start_time;
1676          fprintf(outfile, "Execute time %.3f milliseconds\n",          fprintf(outfile, "Execute time %.3f milliseconds\n",
1677            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
# Line 1243  while (!done) Line 1679  while (!done)
1679          }          }
1680    
1681        /* If find_match_limit is set, we want to do repeated matches with        /* If find_match_limit is set, we want to do repeated matches with
1682        varying limits in order to find the minimum value. */        varying limits in order to find the minimum value for the match limit and
1683          for the recursion limit. */
1684    
1685        if (find_match_limit)        if (find_match_limit)
1686          {          {
         int min = 0;  
         int mid = 64;  
         int max = -1;  
   
1687          if (extra == NULL)          if (extra == NULL)
1688            {            {
1689            extra = (pcre_extra *)malloc(sizeof(pcre_extra));            extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1690            extra->flags = 0;            extra->flags = 0;
1691            }            }
         extra->flags |= PCRE_EXTRA_MATCH_LIMIT;  
   
         for (;;)  
           {  
           extra->match_limit = mid;  
           count = pcre_exec(re, extra, (char *)bptr, len, start_offset,  
             options | g_notempty, use_offsets, use_size_offsets);  
           if (count == PCRE_ERROR_MATCHLIMIT)  
             {  
             /* fprintf(outfile, "Testing match limit = %d\n", mid); */  
             min = mid;  
             mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;  
             }  
           else if (count >= 0 || count == PCRE_ERROR_NOMATCH)  
             {  
             if (mid == min + 1)  
               {  
               fprintf(outfile, "Minimum match limit = %d\n", mid);  
               break;  
               }  
             /* fprintf(outfile, "Testing match limit = %d\n", mid); */  
             max = mid;  
             mid = (min + mid)/2;  
             }  
           else break;    /* Some other error */  
           }  
1692    
1693          extra->flags &= ~PCRE_EXTRA_MATCH_LIMIT;          count = check_match_limit(re, extra, bptr, len, start_offset,
1694              options|g_notempty, use_offsets, use_size_offsets,
1695              PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
1696              PCRE_ERROR_MATCHLIMIT, "match()");
1697    
1698            count = check_match_limit(re, extra, bptr, len, start_offset,
1699              options|g_notempty, use_offsets, use_size_offsets,
1700              PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
1701              PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
1702          }          }
1703    
1704        /* If callout_data is set, use the interface with additional data */        /* If callout_data is set, use the interface with additional data */
# Line 1305  while (!done) Line 1720  while (!done)
1720        /* The normal case is just to do the match once, with the default        /* The normal case is just to do the match once, with the default
1721        value of match_limit. */        value of match_limit. */
1722    
1723        else count = pcre_exec(re, extra, (char *)bptr, len,  #if !defined NODFA
1724          start_offset, options | g_notempty, use_offsets, use_size_offsets);        else if (all_use_dfa || use_dfa)
1725            {
1726            int workspace[1000];
1727            count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
1728              options | g_notempty, use_offsets, use_size_offsets, workspace,
1729              sizeof(workspace)/sizeof(int));
1730            if (count == 0)
1731              {
1732              fprintf(outfile, "Matched, but too many subsidiary matches\n");
1733              count = use_size_offsets/2;
1734              }
1735            }
1736    #endif
1737    
1738        if (count == 0)        else
1739          {          {
1740          fprintf(outfile, "Matched, but too many substrings\n");          count = pcre_exec(re, extra, (char *)bptr, len,
1741          count = use_size_offsets/3;            start_offset, options | g_notempty, use_offsets, use_size_offsets);
1742            if (count == 0)
1743              {
1744              fprintf(outfile, "Matched, but too many substrings\n");
1745              count = use_size_offsets/3;
1746              }
1747          }          }
1748    
1749        /* Matched */        /* Matched */
# Line 1393  while (!done) Line 1825  while (!done)
1825            }            }
1826          }          }
1827    
1828          /* There was a partial match */
1829    
1830          else if (count == PCRE_ERROR_PARTIAL)
1831            {
1832            fprintf(outfile, "Partial match");
1833    #if !defined NODFA
1834            if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
1835              fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
1836                bptr + use_offsets[0]);
1837    #endif
1838            fprintf(outfile, "\n");
1839            break;  /* Out of the /g loop */
1840            }
1841    
1842        /* Failed to match. If this is a /g or /G loop and we previously set        /* Failed to match. If this is a /g or /G loop and we previously set
1843        g_notempty after a null match, this is not necessarily the end.        g_notempty after a null match, this is not necessarily the end.
1844        We want to advance the start offset, and continue. In the case of UTF-8        We want to advance the start offset, and continue. In the case of UTF-8
# Line 1467  while (!done) Line 1913  while (!done)
1913    if (posix || do_posix) regfree(&preg);    if (posix || do_posix) regfree(&preg);
1914  #endif  #endif
1915    
1916    if (re != NULL) free(re);    if (re != NULL) new_free(re);
1917    if (extra != NULL) free(extra);    if (extra != NULL) new_free(extra);
1918    if (tables != NULL)    if (tables != NULL)
1919      {      {
1920      free((void *)tables);      new_free((void *)tables);
1921      setlocale(LC_CTYPE, "C");      setlocale(LC_CTYPE, "C");
1922      }      }
1923    }    }
1924    
1925  if (infile == stdin) fprintf(outfile, "\n");  if (infile == stdin) fprintf(outfile, "\n");
1926  return 0;  
1927    EXIT:
1928    
1929    if (infile != NULL && infile != stdin) fclose(infile);
1930    if (outfile != NULL && outfile != stdout) fclose(outfile);
1931    
1932    free(buffer);
1933    free(dbuffer);
1934    free(pbuffer);
1935    free(offsets);
1936    
1937    return yield;
1938  }  }
1939    
1940  /* End */  /* End of pcretest.c */

Legend:
Removed from v.73  
changed lines
  Added in v.87

  ViewVC Help
Powered by ViewVC 1.1.5