/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 63 by nigel, Sat Feb 24 21:40:03 2007 UTC revision 87 by nigel, Sat Feb 24 21:41:21 2007 UTC
# Line 4  Line 4 
4    
5  /* This program was hacked up as a tester for PCRE. I really should have  /* This program was hacked up as a tester for PCRE. I really should have
6  written it more tidily in the first place. Will I ever learn? It has grown and  written it more tidily in the first place. Will I ever learn? It has grown and
7  been extended and consequently is now rather untidy in places. */  been extended and consequently is now rather, er, *very* untidy in places.
8    
9    -----------------------------------------------------------------------------
10    Redistribution and use in source and binary forms, with or without
11    modification, are permitted provided that the following conditions are met:
12    
13        * Redistributions of source code must retain the above copyright notice,
14          this list of conditions and the following disclaimer.
15    
16        * Redistributions in binary form must reproduce the above copyright
17          notice, this list of conditions and the following disclaimer in the
18          documentation and/or other materials provided with the distribution.
19    
20        * Neither the name of the University of Cambridge nor the names of its
21          contributors may be used to endorse or promote products derived from
22          this software without specific prior written permission.
23    
24    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34    POSSIBILITY OF SUCH DAMAGE.
35    -----------------------------------------------------------------------------
36    */
37    
38    
39  #include <ctype.h>  #include <ctype.h>
40  #include <stdio.h>  #include <stdio.h>
# Line 12  been extended and consequently is now ra Line 42  been extended and consequently is now ra
42  #include <stdlib.h>  #include <stdlib.h>
43  #include <time.h>  #include <time.h>
44  #include <locale.h>  #include <locale.h>
45    #include <errno.h>
 /* We need the internal info for displaying the results of pcre_study(). Also  
 for getting the opcodes for showing compiled code. */  
46    
47  #define PCRE_SPY        /* For Win32 build, import data, not export */  #define PCRE_SPY        /* For Win32 build, import data, not export */
48  #include "internal.h"  
49    /* We include pcre_internal.h because we need the internal info for displaying
50    the results of pcre_study() and we also need to know about the internal
51    macros, structures, and other internal data values; pcretest has "inside
52    information" compared to a program that strictly follows the PCRE API. */
53    
54    #include "pcre_internal.h"
55    
56    /* We need access to the data tables that PCRE uses. So as not to have to keep
57    two copies, we include the source file here, changing the names of the external
58    symbols to prevent clashes. */
59    
60    #define _pcre_utf8_table1      utf8_table1
61    #define _pcre_utf8_table1_size utf8_table1_size
62    #define _pcre_utf8_table2      utf8_table2
63    #define _pcre_utf8_table3      utf8_table3
64    #define _pcre_utf8_table4      utf8_table4
65    #define _pcre_utt              utt
66    #define _pcre_utt_size         utt_size
67    #define _pcre_OP_lengths       OP_lengths
68    
69    #include "pcre_tables.c"
70    
71    /* We also need the pcre_printint() function for printing out compiled
72    patterns. This function is in a separate file so that it can be included in
73    pcre_compile.c when that module is compiled with debugging enabled. */
74    
75    #include "pcre_printint.src"
76    
77    
78  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
79  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 27  Makefile. */ Line 83  Makefile. */
83  #include "pcreposix.h"  #include "pcreposix.h"
84  #endif  #endif
85    
86    /* It is also possible, for the benefit of the version imported into Exim, to
87    build pcretest without support for UTF8 (define NOUTF8), without the interface
88    to the DFA matcher (NODFA), and without the doublecheck of the old "info"
89    function (define NOINFOCHECK). */
90    
91    
92    /* Other parameters */
93    
94  #ifndef CLOCKS_PER_SEC  #ifndef CLOCKS_PER_SEC
95  #ifdef CLK_TCK  #ifdef CLK_TCK
96  #define CLOCKS_PER_SEC CLK_TCK  #define CLOCKS_PER_SEC CLK_TCK
# Line 35  Makefile. */ Line 99  Makefile. */
99  #endif  #endif
100  #endif  #endif
101    
102  #define LOOPREPEAT 50000  #define LOOPREPEAT 500000
103    
104    #define BUFFER_SIZE 30000
105    #define PBUFFER_SIZE BUFFER_SIZE
106    #define DBUFFER_SIZE BUFFER_SIZE
107    
108    
109    /* Static variables */
110    
111  static FILE *outfile;  static FILE *outfile;
112  static int log_store = 0;  static int log_store = 0;
113  static int callout_count;  static int callout_count;
# Line 45  static int callout_extra; Line 115  static int callout_extra;
115  static int callout_fail_count;  static int callout_fail_count;
116  static int callout_fail_id;  static int callout_fail_id;
117  static int first_callout;  static int first_callout;
118  static int utf8;  static int show_malloc;
119    static int use_utf8;
120  static size_t gotten_store;  static size_t gotten_store;
121    
122    static uschar *pbuffer = NULL;
   
 static int utf8_table1[] = {  
   0x0000007f, 0x000007ff, 0x0000ffff, 0x001fffff, 0x03ffffff, 0x7fffffff};  
   
 static int utf8_table2[] = {  
   0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};  
   
 static int utf8_table3[] = {  
   0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};  
   
   
   
 /*************************************************  
 *         Print compiled regex                   *  
 *************************************************/  
   
 /* The code for doing this is held in a separate file that is also included in  
 pcre.c when it is compiled with the debug switch. It defines a function called  
 print_internals(), which uses a table of opcode lengths defined by the macro  
 OP_LENGTHS, whose name must be OP_lengths. */  
   
 static uschar OP_lengths[] = { OP_LENGTHS };  
   
 #include "printint.c"  
123    
124    
125    
# Line 103  return(result); Line 150  return(result);
150    
151    
152    
 /*************************************************  
 *       Convert character value to UTF-8         *  
 *************************************************/  
   
 /* This function takes an integer value in the range 0 - 0x7fffffff  
 and encodes it as a UTF-8 character in 0 to 6 bytes.  
   
 Arguments:  
   cvalue     the character value  
   buffer     pointer to buffer for result - at least 6 bytes long  
   
 Returns:     number of characters placed in the buffer  
              -1 if input character is negative  
              0 if input character is positive but too big (only when  
              int is longer than 32 bits)  
 */  
   
 static int  
 ord2utf8(int cvalue, unsigned char *buffer)  
 {  
 register int i, j;  
 for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++)  
   if (cvalue <= utf8_table1[i]) break;  
 if (i >= sizeof(utf8_table1)/sizeof(int)) return 0;  
 if (cvalue < 0) return -1;  
   
 buffer += i;  
 for (j = i; j > 0; j--)  
  {  
  *buffer-- = 0x80 | (cvalue & 0x3f);  
  cvalue >>= 6;  
  }  
 *buffer = utf8_table2[i] | cvalue;  
 return i + 1;  
 }  
   
153    
154  /*************************************************  /*************************************************
155  *            Convert UTF-8 string to value       *  *            Convert UTF-8 string to value       *
# Line 155  Returns:   >  0 => the number of bytes c Line 166  Returns:   >  0 => the number of bytes c
166             -6 to 0 => malformed UTF-8 character at offset = (-return)             -6 to 0 => malformed UTF-8 character at offset = (-return)
167  */  */
168    
169  int  #if !defined NOUTF8
170    
171    static int
172  utf82ord(unsigned char *buffer, int *vptr)  utf82ord(unsigned char *buffer, int *vptr)
173  {  {
174  int c = *buffer++;  int c = *buffer++;
# Line 186  for (j = 0; j < i; j++) Line 199  for (j = 0; j < i; j++)
199    
200  /* Check that encoding was the correct unique one */  /* Check that encoding was the correct unique one */
201    
202  for (j = 0; j < sizeof(utf8_table1)/sizeof(int); j++)  for (j = 0; j < utf8_table1_size; j++)
203    if (d <= utf8_table1[j]) break;    if (d <= utf8_table1[j]) break;
204  if (j != i) return -(i+1);  if (j != i) return -(i+1);
205    
# Line 196  if (j != i) return -(i+1); Line 209  if (j != i) return -(i+1);
209  return i+1;  return i+1;
210  }  }
211    
212    #endif
213    
214    
215    
216    /*************************************************
217    *       Convert character value to UTF-8         *
218    *************************************************/
219    
220    /* This function takes an integer value in the range 0 - 0x7fffffff
221    and encodes it as a UTF-8 character in 0 to 6 bytes.
222    
223    Arguments:
224      cvalue     the character value
225      buffer     pointer to buffer for result - at least 6 bytes long
226    
227    Returns:     number of characters placed in the buffer
228    */
229    
230    static int
231    ord2utf8(int cvalue, uschar *buffer)
232    {
233    register int i, j;
234    for (i = 0; i < utf8_table1_size; i++)
235      if (cvalue <= utf8_table1[i]) break;
236    buffer += i;
237    for (j = i; j > 0; j--)
238     {
239     *buffer-- = 0x80 | (cvalue & 0x3f);
240     cvalue >>= 6;
241     }
242    *buffer = utf8_table2[i] | cvalue;
243    return i + 1;
244    }
245    
246    
247    
248  /*************************************************  /*************************************************
# Line 208  chars without printing. */ Line 255  chars without printing. */
255    
256  static int pchars(unsigned char *p, int length, FILE *f)  static int pchars(unsigned char *p, int length, FILE *f)
257  {  {
258  int c;  int c = 0;
259  int yield = 0;  int yield = 0;
260    
261  while (length-- > 0)  while (length-- > 0)
262    {    {
263    if (utf8)  #if !defined NOUTF8
264      if (use_utf8)
265      {      {
266      int rc = utf82ord(p, &c);      int rc = utf82ord(p, &c);
267    
# Line 235  while (length-- > 0) Line 283  while (length-- > 0)
283        continue;        continue;
284        }        }
285      }      }
286    #endif
287    
288     /* Not UTF-8, or malformed UTF-8  */     /* Not UTF-8, or malformed UTF-8  */
289    
# Line 266  data is not zero. */ Line 315  data is not zero. */
315  static int callout(pcre_callout_block *cb)  static int callout(pcre_callout_block *cb)
316  {  {
317  FILE *f = (first_callout | callout_extra)? outfile : NULL;  FILE *f = (first_callout | callout_extra)? outfile : NULL;
318  int i, pre_start, post_start;  int i, pre_start, post_start, subject_length;
319    
320  if (callout_extra)  if (callout_extra)
321    {    {
   int i;  
322    fprintf(f, "Callout %d: last capture = %d\n",    fprintf(f, "Callout %d: last capture = %d\n",
323      cb->callout_number, cb->capture_last);      cb->callout_number, cb->capture_last);
324    
# Line 298  pre_start = pchars((unsigned char *)cb-> Line 346  pre_start = pchars((unsigned char *)cb->
346  post_start = pchars((unsigned char *)(cb->subject + cb->start_match),  post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
347    cb->current_position - cb->start_match, f);    cb->current_position - cb->start_match, f);
348    
349    subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
350    
351  (void)pchars((unsigned char *)(cb->subject + cb->current_position),  (void)pchars((unsigned char *)(cb->subject + cb->current_position),
352    cb->subject_length - cb->current_position, f);    cb->subject_length - cb->current_position, f);
353    
354  if (f != NULL) fprintf(f, "\n");  if (f != NULL) fprintf(f, "\n");
355    
356  /* Always print appropriate indicators, with callout number if not already  /* Always print appropriate indicators, with callout number if not already
357  shown */  shown. For automatic callouts, show the pattern offset. */
358    
359  if (callout_extra) fprintf(outfile, "    ");  if (cb->callout_number == 255)
360    else fprintf(outfile, "%3d ", cb->callout_number);    {
361      fprintf(outfile, "%+3d ", cb->pattern_position);
362      if (cb->pattern_position > 99) fprintf(outfile, "\n    ");
363      }
364    else
365      {
366      if (callout_extra) fprintf(outfile, "    ");
367        else fprintf(outfile, "%3d ", cb->callout_number);
368      }
369    
370  for (i = 0; i < pre_start; i++) fprintf(outfile, " ");  for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
371  fprintf(outfile, "^");  fprintf(outfile, "^");
# Line 318  if (post_start > 0) Line 376  if (post_start > 0)
376    fprintf(outfile, "^");    fprintf(outfile, "^");
377    }    }
378    
379  fprintf(outfile, "\n");  for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
380      fprintf(outfile, " ");
381    
382    fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
383      pbuffer + cb->pattern_position);
384    
385    fprintf(outfile, "\n");
386  first_callout = 0;  first_callout = 0;
387    
388  if ((int)(cb->callout_data) != 0)  if (cb->callout_data != NULL)
389    {    {
390    fprintf(outfile, "Callout data = %d\n", (int)(cb->callout_data));    int callout_data = *((int *)(cb->callout_data));
391    return (int)(cb->callout_data);    if (callout_data != 0)
392        {
393        fprintf(outfile, "Callout data = %d\n", callout_data);
394        return callout_data;
395        }
396    }    }
397    
398  return (cb->callout_number != callout_fail_id)? 0 :  return (cb->callout_number != callout_fail_id)? 0 :
# Line 334  return (cb->callout_number != callout_fa Line 401  return (cb->callout_number != callout_fa
401    
402    
403  /*************************************************  /*************************************************
404  *            Local malloc function               *  *            Local malloc functions              *
405  *************************************************/  *************************************************/
406    
407  /* Alternative malloc function, to test functionality and show the size of the  /* Alternative malloc function, to test functionality and show the size of the
# Line 342  compiled re. */ Line 409  compiled re. */
409    
410  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
411  {  {
412    void *block = malloc(size);
413  gotten_store = size;  gotten_store = size;
414  return malloc(size);  if (show_malloc)
415      fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
416    return block;
417    }
418    
419    static void new_free(void *block)
420    {
421    if (show_malloc)
422      fprintf(outfile, "free             %p\n", block);
423    free(block);
424  }  }
425    
426    
427    /* For recursion malloc/free, to test stacking calls */
428    
429    static void *stack_malloc(size_t size)
430    {
431    void *block = malloc(size);
432    if (show_malloc)
433      fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
434    return block;
435    }
436    
437    static void stack_free(void *block)
438    {
439    if (show_malloc)
440      fprintf(outfile, "stack_free       %p\n", block);
441    free(block);
442    }
443    
444    
445  /*************************************************  /*************************************************
446  *          Call pcre_fullinfo()                  *  *          Call pcre_fullinfo()                  *
# Line 364  if ((rc = pcre_fullinfo(re, study, optio Line 458  if ((rc = pcre_fullinfo(re, study, optio
458    
459    
460  /*************************************************  /*************************************************
461    *         Byte flipping function                 *
462    *************************************************/
463    
464    static long int
465    byteflip(long int value, int n)
466    {
467    if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
468    return ((value & 0x000000ff) << 24) |
469           ((value & 0x0000ff00) <<  8) |
470           ((value & 0x00ff0000) >>  8) |
471           ((value & 0xff000000) >> 24);
472    }
473    
474    
475    
476    
477    /*************************************************
478    *        Check match or recursion limit          *
479    *************************************************/
480    
481    static int
482    check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
483      int start_offset, int options, int *use_offsets, int use_size_offsets,
484      int flag, unsigned long int *limit, int errnumber, const char *msg)
485    {
486    int count;
487    int min = 0;
488    int mid = 64;
489    int max = -1;
490    
491    extra->flags |= flag;
492    
493    for (;;)
494      {
495      *limit = mid;
496    
497      count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
498        use_offsets, use_size_offsets);
499    
500      if (count == errnumber)
501        {
502        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
503        min = mid;
504        mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
505        }
506    
507      else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
508                             count == PCRE_ERROR_PARTIAL)
509        {
510        if (mid == min + 1)
511          {
512          fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
513          break;
514          }
515        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
516        max = mid;
517        mid = (min + mid)/2;
518        }
519      else break;    /* Some other error */
520      }
521    
522    extra->flags &= ~flag;
523    return count;
524    }
525    
526    
527    
528    /*************************************************
529  *                Main Program                    *  *                Main Program                    *
530  *************************************************/  *************************************************/
531    
# Line 380  int op = 1; Line 542  int op = 1;
542  int timeit = 0;  int timeit = 0;
543  int showinfo = 0;  int showinfo = 0;
544  int showstore = 0;  int showstore = 0;
545    int quiet = 0;
546  int size_offsets = 45;  int size_offsets = 45;
547  int size_offsets_max;  int size_offsets_max;
548  int *offsets;  int *offsets = NULL;
549  #if !defined NOPOSIX  #if !defined NOPOSIX
550  int posix = 0;  int posix = 0;
551  #endif  #endif
552  int debug = 0;  int debug = 0;
553  int done = 0;  int done = 0;
554  unsigned char buffer[30000];  int all_use_dfa = 0;
555  unsigned char dbuffer[1024];  int yield = 0;
556    
557    unsigned char *buffer;
558    unsigned char *dbuffer;
559    
560  /* Static so that new_malloc can use it. */  /* Get buffers from malloc() so that Electric Fence will check their misuse
561    when I am debugging. */
562    
563    buffer = (unsigned char *)malloc(BUFFER_SIZE);
564    dbuffer = (unsigned char *)malloc(DBUFFER_SIZE);
565    pbuffer = (unsigned char *)malloc(PBUFFER_SIZE);
566    
567    /* The outfile variable is static so that new_malloc can use it. The _setmode()
568    stuff is some magic that I don't understand, but which apparently does good
569    things in Windows. It's related to line terminations.  */
570    
571    #if defined(_WIN32) || defined(WIN32)
572    _setmode( _fileno( stdout ), 0x8000 );
573    #endif  /* defined(_WIN32) || defined(WIN32) */
574    
575  outfile = stdout;  outfile = stdout;
576    
# Line 404  while (argc > 1 && argv[op][0] == '-') Line 583  while (argc > 1 && argv[op][0] == '-')
583    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
584      showstore = 1;      showstore = 1;
585    else if (strcmp(argv[op], "-t") == 0) timeit = 1;    else if (strcmp(argv[op], "-t") == 0) timeit = 1;
586      else if (strcmp(argv[op], "-q") == 0) quiet = 1;
587    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
588    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
589    #if !defined NODFA
590      else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
591    #endif
592    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
593        ((size_offsets = get_value(argv[op+1], &endptr)), *endptr == 0))        ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
594            *endptr == 0))
595      {      {
596      op++;      op++;
597      argc--;      argc--;
# Line 422  while (argc > 1 && argv[op][0] == '-') Line 606  while (argc > 1 && argv[op][0] == '-')
606      printf("Compiled with\n");      printf("Compiled with\n");
607      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
608      printf("  %sUTF-8 support\n", rc? "" : "No ");      printf("  %sUTF-8 support\n", rc? "" : "No ");
609        (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
610        printf("  %sUnicode properties support\n", rc? "" : "No ");
611      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
612      printf("  Newline character is %s\n", (rc == '\r')? "CR" : "LF");      printf("  Newline character is %s\n", (rc == '\r')? "CR" : "LF");
613      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
# Line 430  while (argc > 1 && argv[op][0] == '-') Line 616  while (argc > 1 && argv[op][0] == '-')
616      printf("  POSIX malloc threshold = %d\n", rc);      printf("  POSIX malloc threshold = %d\n", rc);
617      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
618      printf("  Default match limit = %d\n", rc);      printf("  Default match limit = %d\n", rc);
619        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);
620        printf("  Default recursion depth limit = %d\n", rc);
621        (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
622        printf("  Match recursion uses %s\n", rc? "stack" : "heap");
623      exit(0);      exit(0);
624      }      }
625    else    else
# Line 437  while (argc > 1 && argv[op][0] == '-') Line 627  while (argc > 1 && argv[op][0] == '-')
627      printf("** Unknown or malformed option %s\n", argv[op]);      printf("** Unknown or malformed option %s\n", argv[op]);
628      printf("Usage:   pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");      printf("Usage:   pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");
629      printf("  -C     show PCRE compile-time options and exit\n");      printf("  -C     show PCRE compile-time options and exit\n");
630      printf("  -d     debug: show compiled code; implies -i\n"      printf("  -d     debug: show compiled code; implies -i\n");
631             "  -i     show information about compiled pattern\n"  #if !defined NODFA
632        printf("  -dfa   force DFA matching for all subjects\n");
633    #endif
634        printf("  -i     show information about compiled pattern\n"
635               "  -m     output memory used information\n"
636             "  -o <n> set size of offsets vector to <n>\n");             "  -o <n> set size of offsets vector to <n>\n");
637  #if !defined NOPOSIX  #if !defined NOPOSIX
638      printf("  -p     use POSIX interface\n");      printf("  -p     use POSIX interface\n");
639  #endif  #endif
640      printf("  -s     output store information\n"      printf("  -s     output store (memory) used information\n"
641             "  -t     time compilation and execution\n");             "  -t     time compilation and execution\n");
642      return 1;      yield = 1;
643        goto EXIT;
644      }      }
645    op++;    op++;
646    argc--;    argc--;
# Line 454  while (argc > 1 && argv[op][0] == '-') Line 649  while (argc > 1 && argv[op][0] == '-')
649  /* Get the store for the offsets vector, and remember what it was */  /* Get the store for the offsets vector, and remember what it was */
650    
651  size_offsets_max = size_offsets;  size_offsets_max = size_offsets;
652  offsets = malloc(size_offsets_max * sizeof(int));  offsets = (int *)malloc(size_offsets_max * sizeof(int));
653  if (offsets == NULL)  if (offsets == NULL)
654    {    {
655    printf("** Failed to get %d bytes of memory for offsets vector\n",    printf("** Failed to get %d bytes of memory for offsets vector\n",
656      size_offsets_max * sizeof(int));      size_offsets_max * sizeof(int));
657    return 1;    yield = 1;
658      goto EXIT;
659    }    }
660    
661  /* Sort out the input and output files */  /* Sort out the input and output files */
662    
663  if (argc > 1)  if (argc > 1)
664    {    {
665    infile = fopen(argv[op], "r");    infile = fopen(argv[op], "rb");
666    if (infile == NULL)    if (infile == NULL)
667      {      {
668      printf("** Failed to open %s\n", argv[op]);      printf("** Failed to open %s\n", argv[op]);
669      return 1;      yield = 1;
670        goto EXIT;
671      }      }
672    }    }
673    
674  if (argc > 2)  if (argc > 2)
675    {    {
676    outfile = fopen(argv[op+1], "w");    outfile = fopen(argv[op+1], "wb");
677    if (outfile == NULL)    if (outfile == NULL)
678      {      {
679      printf("** Failed to open %s\n", argv[op+1]);      printf("** Failed to open %s\n", argv[op+1]);
680      return 1;      yield = 1;
681        goto EXIT;
682      }      }
683    }    }
684    
685  /* Set alternative malloc function */  /* Set alternative malloc function */
686    
687  pcre_malloc = new_malloc;  pcre_malloc = new_malloc;
688    pcre_free = new_free;
689    pcre_stack_malloc = stack_malloc;
690    pcre_stack_free = stack_free;
691    
692  /* Heading line, then prompt for first regex if stdin */  /* Heading line unless quiet, then prompt for first regex if stdin */
693    
694  fprintf(outfile, "PCRE version %s\n\n", pcre_version());  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
695    
696  /* Main loop */  /* Main loop */
697    
# Line 506  while (!done) Line 707  while (!done)
707    
708    const char *error;    const char *error;
709    unsigned char *p, *pp, *ppp;    unsigned char *p, *pp, *ppp;
710      unsigned char *to_file = NULL;
711    const unsigned char *tables = NULL;    const unsigned char *tables = NULL;
712      unsigned long int true_size, true_study_size = 0;
713      size_t size, regex_gotten_store;
714    int do_study = 0;    int do_study = 0;
715    int do_debug = debug;    int do_debug = debug;
716    int do_G = 0;    int do_G = 0;
717    int do_g = 0;    int do_g = 0;
718    int do_showinfo = showinfo;    int do_showinfo = showinfo;
719    int do_showrest = 0;    int do_showrest = 0;
720      int do_flip = 0;
721    int erroroffset, len, delimiter;    int erroroffset, len, delimiter;
722    
723    utf8 = 0;    use_utf8 = 0;
724    
725    if (infile == stdin) printf("  re> ");    if (infile == stdin) printf("  re> ");
726    if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;    if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL) break;
727    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
728    fflush(outfile);    fflush(outfile);
729    
# Line 526  while (!done) Line 731  while (!done)
731    while (isspace(*p)) p++;    while (isspace(*p)) p++;
732    if (*p == 0) continue;    if (*p == 0) continue;
733    
734    /* Get the delimiter and seek the end of the pattern; if is isn't    /* See if the pattern is to be loaded pre-compiled from a file. */
735    complete, read more. */  
736      if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
737        {
738        unsigned long int magic;
739        uschar sbuf[8];
740        FILE *f;
741    
742        p++;
743        pp = p + (int)strlen((char *)p);
744        while (isspace(pp[-1])) pp--;
745        *pp = 0;
746    
747        f = fopen((char *)p, "rb");
748        if (f == NULL)
749          {
750          fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
751          continue;
752          }
753    
754        if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
755    
756        true_size =
757          (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
758        true_study_size =
759          (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
760    
761        re = (real_pcre *)new_malloc(true_size);
762        regex_gotten_store = gotten_store;
763    
764        if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
765    
766        magic = ((real_pcre *)re)->magic_number;
767        if (magic != MAGIC_NUMBER)
768          {
769          if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
770            {
771            do_flip = 1;
772            }
773          else
774            {
775            fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
776            fclose(f);
777            continue;
778            }
779          }
780    
781        fprintf(outfile, "Compiled regex%s loaded from %s\n",
782          do_flip? " (byte-inverted)" : "", p);
783    
784        /* Need to know if UTF-8 for printing data strings */
785    
786        new_info(re, NULL, PCRE_INFO_OPTIONS, &options);
787        use_utf8 = (options & PCRE_UTF8) != 0;
788    
789        /* Now see if there is any following study data */
790    
791        if (true_study_size != 0)
792          {
793          pcre_study_data *psd;
794    
795          extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
796          extra->flags = PCRE_EXTRA_STUDY_DATA;
797    
798          psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
799          extra->study_data = psd;
800    
801          if (fread(psd, 1, true_study_size, f) != true_study_size)
802            {
803            FAIL_READ:
804            fprintf(outfile, "Failed to read data from %s\n", p);
805            if (extra != NULL) new_free(extra);
806            if (re != NULL) new_free(re);
807            fclose(f);
808            continue;
809            }
810          fprintf(outfile, "Study data loaded from %s\n", p);
811          do_study = 1;     /* To get the data output if requested */
812          }
813        else fprintf(outfile, "No study data\n");
814    
815        fclose(f);
816        goto SHOW_INFO;
817        }
818    
819      /* In-line pattern (the usual case). Get the delimiter and seek the end of
820      the pattern; if is isn't complete, read more. */
821    
822    delimiter = *p++;    delimiter = *p++;
823    
# Line 549  while (!done) Line 839  while (!done)
839        }        }
840      if (*pp != 0) break;      if (*pp != 0) break;
841    
842      len = sizeof(buffer) - (pp - buffer);      len = BUFFER_SIZE - (pp - buffer);
843      if (len < 256)      if (len < 256)
844        {        {
845        fprintf(outfile, "** Expression too long - missing delimiter?\n");        fprintf(outfile, "** Expression too long - missing delimiter?\n");
# Line 572  while (!done) Line 862  while (!done)
862    
863    if (pp[1] == '\\') *pp++ = '\\';    if (pp[1] == '\\') *pp++ = '\\';
864    
865    /* Terminate the pattern at the delimiter */    /* Terminate the pattern at the delimiter, and save a copy of the pattern
866      for callouts. */
867    
868    *pp++ = 0;    *pp++ = 0;
869      strcpy((char *)pbuffer, (char *)p);
870    
871    /* Look for options after final delimiter */    /* Look for options after final delimiter */
872    
# Line 586  while (!done) Line 878  while (!done)
878      {      {
879      switch (*pp++)      switch (*pp++)
880        {        {
881          case 'f': options |= PCRE_FIRSTLINE; break;
882        case 'g': do_g = 1; break;        case 'g': do_g = 1; break;
883        case 'i': options |= PCRE_CASELESS; break;        case 'i': options |= PCRE_CASELESS; break;
884        case 'm': options |= PCRE_MULTILINE; break;        case 'm': options |= PCRE_MULTILINE; break;
# Line 594  while (!done) Line 887  while (!done)
887    
888        case '+': do_showrest = 1; break;        case '+': do_showrest = 1; break;
889        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
890          case 'C': options |= PCRE_AUTO_CALLOUT; break;
891        case 'D': do_debug = do_showinfo = 1; break;        case 'D': do_debug = do_showinfo = 1; break;
892        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
893          case 'F': do_flip = 1; break;
894        case 'G': do_G = 1; break;        case 'G': do_G = 1; break;
895        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
896        case 'M': log_store = 1; break;        case 'M': log_store = 1; break;
# Line 608  while (!done) Line 903  while (!done)
903        case 'S': do_study = 1; break;        case 'S': do_study = 1; break;
904        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
905        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
906        case '8': options |= PCRE_UTF8; utf8 = 1; break;        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
907          case '?': options |= PCRE_NO_UTF8_CHECK; break;
908    
909        case 'L':        case 'L':
910        ppp = pp;        ppp = pp;
911        while (*ppp != '\n' && *ppp != ' ') ppp++;        /* The '\r' test here is so that it works on Windows */
912          while (*ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
913        *ppp = 0;        *ppp = 0;
914        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
915          {          {
# Line 623  while (!done) Line 920  while (!done)
920        pp = ppp;        pp = ppp;
921        break;        break;
922    
923        case '\n': case ' ': break;        case '>':
924          to_file = pp;
925          while (*pp != 0) pp++;
926          while (isspace(pp[-1])) pp--;
927          *pp = 0;
928          break;
929    
930          case '\r':                      /* So that it works in Windows */
931          case '\n':
932          case ' ':
933          break;
934    
935        default:        default:
936        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
937        goto SKIP_DATA;        goto SKIP_DATA;
# Line 639  while (!done) Line 947  while (!done)
947      {      {
948      int rc;      int rc;
949      int cflags = 0;      int cflags = 0;
950    
951      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
952      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
953        if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
954        if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
955        if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
956    
957      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
958    
959      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 648  while (!done) Line 961  while (!done)
961    
962      if (rc != 0)      if (rc != 0)
963        {        {
964        (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));        (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);
965        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
966        goto SKIP_DATA;        goto SKIP_DATA;
967        }        }
# Line 689  while (!done) Line 1002  while (!done)
1002          {          {
1003          for (;;)          for (;;)
1004            {            {
1005            if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)            if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)
1006              {              {
1007              done = 1;              done = 1;
1008              goto CONTINUE;              goto CONTINUE;
# Line 713  while (!done) Line 1026  while (!done)
1026                sizeof(real_pcre) -                sizeof(real_pcre) -
1027                ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));                ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1028    
1029        /* Extract the size for possible writing before possibly flipping it,
1030        and remember the store that was got. */
1031    
1032        true_size = ((real_pcre *)re)->size;
1033        regex_gotten_store = gotten_store;
1034    
1035        /* If /S was present, study the regexp to generate additional info to
1036        help with the matching. */
1037    
1038        if (do_study)
1039          {
1040          if (timeit)
1041            {
1042            register int i;
1043            clock_t time_taken;
1044            clock_t start_time = clock();
1045            for (i = 0; i < LOOPREPEAT; i++)
1046              extra = pcre_study(re, study_options, &error);
1047            time_taken = clock() - start_time;
1048            if (extra != NULL) free(extra);
1049            fprintf(outfile, "  Study time %.3f milliseconds\n",
1050              (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
1051                (double)CLOCKS_PER_SEC);
1052            }
1053          extra = pcre_study(re, study_options, &error);
1054          if (error != NULL)
1055            fprintf(outfile, "Failed to study: %s\n", error);
1056          else if (extra != NULL)
1057            true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1058          }
1059    
1060        /* If the 'F' option was present, we flip the bytes of all the integer
1061        fields in the regex data block and the study block. This is to make it
1062        possible to test PCRE's handling of byte-flipped patterns, e.g. those
1063        compiled on a different architecture. */
1064    
1065        if (do_flip)
1066          {
1067          real_pcre *rre = (real_pcre *)re;
1068          rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));
1069          rre->size = byteflip(rre->size, sizeof(rre->size));
1070          rre->options = byteflip(rre->options, sizeof(rre->options));
1071          rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1072          rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));
1073          rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));
1074          rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));
1075          rre->name_table_offset = byteflip(rre->name_table_offset,
1076            sizeof(rre->name_table_offset));
1077          rre->name_entry_size = byteflip(rre->name_entry_size,
1078            sizeof(rre->name_entry_size));
1079          rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));
1080    
1081          if (extra != NULL)
1082            {
1083            pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1084            rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1085            rsd->options = byteflip(rsd->options, sizeof(rsd->options));
1086            }
1087          }
1088    
1089        /* Extract information from the compiled data if required */
1090    
1091        SHOW_INFO:
1092    
1093      if (do_showinfo)      if (do_showinfo)
1094        {        {
1095        unsigned long int get_options;        unsigned long int get_options, all_options;
1096    #if !defined NOINFOCHECK
1097        int old_first_char, old_options, old_count;        int old_first_char, old_options, old_count;
1098    #endif
1099        int count, backrefmax, first_char, need_char;        int count, backrefmax, first_char, need_char;
1100        int nameentrysize, namecount;        int nameentrysize, namecount;
1101        const uschar *nametable;        const uschar *nametable;
       size_t size;  
1102    
1103        if (do_debug)        if (do_debug)
1104          {          {
1105          fprintf(outfile, "------------------------------------------------------------------\n");          fprintf(outfile, "------------------------------------------------------------------\n");
1106          print_internals(re, outfile);          pcre_printint(re, outfile);
1107          }          }
1108    
1109        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
# Line 736  while (!done) Line 1114  while (!done)
1114        new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);        new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1115        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1116        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1117        new_info(re, NULL, PCRE_INFO_NAMETABLE, &nametable);        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1118    
1119    #if !defined NOINFOCHECK
1120        old_count = pcre_info(re, &old_options, &old_first_char);        old_count = pcre_info(re, &old_options, &old_first_char);
1121        if (count < 0) fprintf(outfile,        if (count < 0) fprintf(outfile,
1122          "Error %d from pcre_info()\n", count);          "Error %d from pcre_info()\n", count);
# Line 755  while (!done) Line 1134  while (!done)
1134            "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",            "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1135              get_options, old_options);              get_options, old_options);
1136          }          }
1137    #endif
1138    
1139        if (size != gotten_store) fprintf(outfile,        if (size != regex_gotten_store) fprintf(outfile,
1140          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1141          size, gotten_store);          (int)size, (int)regex_gotten_store);
1142    
1143        fprintf(outfile, "Capturing subpattern count = %d\n", count);        fprintf(outfile, "Capturing subpattern count = %d\n", count);
1144        if (backrefmax > 0)        if (backrefmax > 0)
# Line 776  while (!done) Line 1156  while (!done)
1156            }            }
1157          }          }
1158    
1159          /* The NOPARTIAL bit is a private bit in the options, so we have
1160          to fish it out via out back door */
1161    
1162          all_options = ((real_pcre *)re)->options;
1163          if (do_flip)
1164            {
1165            all_options = byteflip(all_options, sizeof(all_options));
1166            }
1167    
1168          if ((all_options & PCRE_NOPARTIAL) != 0)
1169            fprintf(outfile, "Partial matching not supported\n");
1170    
1171        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
1172          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s\n",
1173            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1174            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1175            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1176            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1177              ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1178            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1179            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1180            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1181            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1182            ((get_options & PCRE_UTF8) != 0)? " utf8" : "");            ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1183              ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1184              ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "");
1185    
1186        if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)        if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)
1187          fprintf(outfile, "Case state changes\n");          fprintf(outfile, "Case state changes\n");
# Line 802  while (!done) Line 1197  while (!done)
1197        else        else
1198          {          {
1199          int ch = first_char & 255;          int ch = first_char & 255;
1200          char *caseless = ((first_char & REQ_CASELESS) == 0)?          const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1201            "" : " (caseless)";            "" : " (caseless)";
1202          if (isprint(ch))          if (isprint(ch))
1203            fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);            fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
# Line 817  while (!done) Line 1212  while (!done)
1212        else        else
1213          {          {
1214          int ch = need_char & 255;          int ch = need_char & 255;
1215          char *caseless = ((need_char & REQ_CASELESS) == 0)?          const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1216            "" : " (caseless)";            "" : " (caseless)";
1217          if (isprint(ch))          if (isprint(ch))
1218            fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);            fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1219          else          else
1220            fprintf(outfile, "Need char = %d%s\n", ch, caseless);            fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1221          }          }
       }  
1222    
1223      /* If /S was present, study the regexp to generate additional info to        /* Don't output study size; at present it is in any case a fixed
1224      help with the matching. */        value, but it varies, depending on the computer architecture, and
1225          so messes up the test suite. (And with the /F option, it might be
1226          flipped.) */
1227    
1228      if (do_study)        if (do_study)
       {  
       if (timeit)  
1229          {          {
1230          register int i;          if (extra == NULL)
1231          clock_t time_taken;            fprintf(outfile, "Study returned NULL\n");
1232          clock_t start_time = clock();          else
1233          for (i = 0; i < LOOPREPEAT; i++)            {
1234            extra = pcre_study(re, study_options, &error);            uschar *start_bits = NULL;
1235          time_taken = clock() - start_time;            new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1236          if (extra != NULL) free(extra);  
1237          fprintf(outfile, "  Study time %.3f milliseconds\n",            if (start_bits == NULL)
1238            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /              fprintf(outfile, "No starting byte set\n");
1239              (double)CLOCKS_PER_SEC);            else
1240                {
1241                int i;
1242                int c = 24;
1243                fprintf(outfile, "Starting byte set: ");
1244                for (i = 0; i < 256; i++)
1245                  {
1246                  if ((start_bits[i/8] & (1<<(i&7))) != 0)
1247                    {
1248                    if (c > 75)
1249                      {
1250                      fprintf(outfile, "\n  ");
1251                      c = 2;
1252                      }
1253                    if (isprint(i) && i != ' ')
1254                      {
1255                      fprintf(outfile, "%c ", i);
1256                      c += 2;
1257                      }
1258                    else
1259                      {
1260                      fprintf(outfile, "\\x%02x ", i);
1261                      c += 5;
1262                      }
1263                    }
1264                  }
1265                fprintf(outfile, "\n");
1266                }
1267              }
1268          }          }
1269          }
1270    
1271        extra = pcre_study(re, study_options, &error);      /* If the '>' option was present, we write out the regex to a file, and
1272        if (error != NULL)      that is all. The first 8 bytes of the file are the regex length and then
1273          fprintf(outfile, "Failed to study: %s\n", error);      the study length, in big-endian order. */
       else if (extra == NULL)  
         fprintf(outfile, "Study returned NULL\n");  
1274    
1275        else if (do_showinfo)      if (to_file != NULL)
1276          {
1277          FILE *f = fopen((char *)to_file, "wb");
1278          if (f == NULL)
1279          {          {
1280          size_t size;          fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1281          uschar *start_bits = NULL;          }
1282          new_info(re, extra, PCRE_INFO_STUDYSIZE, &size);        else
1283          new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);          {
1284          fprintf(outfile, "Study size = %d\n", size);          uschar sbuf[8];
1285          if (start_bits == NULL)          sbuf[0] = (true_size >> 24)  & 255;
1286            fprintf(outfile, "No starting character set\n");          sbuf[1] = (true_size >> 16)  & 255;
1287            sbuf[2] = (true_size >>  8)  & 255;
1288            sbuf[3] = (true_size)  & 255;
1289    
1290            sbuf[4] = (true_study_size >> 24)  & 255;
1291            sbuf[5] = (true_study_size >> 16)  & 255;
1292            sbuf[6] = (true_study_size >>  8)  & 255;
1293            sbuf[7] = (true_study_size)  & 255;
1294    
1295            if (fwrite(sbuf, 1, 8, f) < 8 ||
1296                fwrite(re, 1, true_size, f) < true_size)
1297              {
1298              fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1299              }
1300          else          else
1301            {            {
1302            int i;            fprintf(outfile, "Compiled regex written to %s\n", to_file);
1303            int c = 24;            if (extra != NULL)
           fprintf(outfile, "Starting character set: ");  
           for (i = 0; i < 256; i++)  
1304              {              {
1305              if ((start_bits[i/8] & (1<<(i%8))) != 0)              if (fwrite(extra->study_data, 1, true_study_size, f) <
1306                    true_study_size)
1307                {                {
1308                if (c > 75)                fprintf(outfile, "Write error on %s: %s\n", to_file,
1309                  {                  strerror(errno));
                 fprintf(outfile, "\n  ");  
                 c = 2;  
                 }  
               if (isprint(i) && i != ' ')  
                 {  
                 fprintf(outfile, "%c ", i);  
                 c += 2;  
                 }  
               else  
                 {  
                 fprintf(outfile, "\\x%02x ", i);  
                 c += 5;  
                 }  
1310                }                }
1311                else fprintf(outfile, "Study data written to %s\n", to_file);
1312              }              }
           fprintf(outfile, "\n");  
1313            }            }
1314            fclose(f);
1315          }          }
1316    
1317          new_free(re);
1318          if (extra != NULL) new_free(extra);
1319          if (tables != NULL) new_free((void *)tables);
1320          continue;  /* With next regex */
1321        }        }
1322      }      }        /* End of non-POSIX compile */
1323    
1324    /* Read data lines and test them */    /* Read data lines and test them */
1325    
1326    for (;;)    for (;;)
1327      {      {
1328      unsigned char *q;      uschar *q;
1329      unsigned char *bptr = dbuffer;      uschar *bptr = dbuffer;
1330      int *use_offsets = offsets;      int *use_offsets = offsets;
1331      int use_size_offsets = size_offsets;      int use_size_offsets = size_offsets;
1332      int callout_data = 0;      int callout_data = 0;
# Line 910  while (!done) Line 1339  while (!done)
1339      int gmatched = 0;      int gmatched = 0;
1340      int start_offset = 0;      int start_offset = 0;
1341      int g_notempty = 0;      int g_notempty = 0;
1342        int use_dfa = 0;
1343    
1344      options = 0;      options = 0;
1345    
# Line 919  while (!done) Line 1349  while (!done)
1349      callout_count = 0;      callout_count = 0;
1350      callout_fail_count = 999999;      callout_fail_count = 999999;
1351      callout_fail_id = -1;      callout_fail_id = -1;
1352        show_malloc = 0;
1353    
1354      if (infile == stdin) printf("data> ");      if (infile == stdin) printf("data> ");
1355      if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)      if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)
1356        {        {
1357        done = 1;        done = 1;
1358        goto CONTINUE;        goto CONTINUE;
# Line 964  while (!done) Line 1395  while (!done)
1395    
1396          /* Handle \x{..} specially - new Perl thing for utf8 */          /* Handle \x{..} specially - new Perl thing for utf8 */
1397    
1398    #if !defined NOUTF8
1399          if (*p == '{')          if (*p == '{')
1400            {            {
1401            unsigned char *pt = p;            unsigned char *pt = p;
# Line 972  while (!done) Line 1404  while (!done)
1404              c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');              c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1405            if (*pt == '}')            if (*pt == '}')
1406              {              {
1407              unsigned char buffer[8];              unsigned char buff8[8];
1408              int ii, utn;              int ii, utn;
1409              utn = ord2utf8(c, buffer);              utn = ord2utf8(c, buff8);
1410              for (ii = 0; ii < utn - 1; ii++) *q++ = buffer[ii];              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1411              c = buffer[ii];   /* Last byte */              c = buff8[ii];   /* Last byte */
1412              p = pt + 1;              p = pt + 1;
1413              break;              break;
1414              }              }
1415            /* Not correct form; fall through */            /* Not correct form; fall through */
1416            }            }
1417    #endif
1418    
1419          /* Ordinary \x */          /* Ordinary \x */
1420    
# Line 993  while (!done) Line 1426  while (!done)
1426            }            }
1427          break;          break;
1428    
1429          case 0:   /* Allows for an empty line */          case 0:   /* \ followed by EOF allows for an empty line */
1430          p--;          p--;
1431          continue;          continue;
1432    
1433            case '>':
1434            while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1435            continue;
1436    
1437          case 'A':  /* Option setting */          case 'A':  /* Option setting */
1438          options |= PCRE_ANCHORED;          options |= PCRE_ANCHORED;
1439          continue;          continue;
# Line 1014  while (!done) Line 1451  while (!done)
1451          else if (isalnum(*p))          else if (isalnum(*p))
1452            {            {
1453            uschar name[256];            uschar name[256];
1454            uschar *pp = name;            uschar *npp = name;
1455            while (isalnum(*p)) *pp++ = *p++;            while (isalnum(*p)) *npp++ = *p++;
1456            *pp = 0;            *npp = 0;
1457            n = pcre_get_stringnumber(re, name);            n = pcre_get_stringnumber(re, (char *)name);
1458            if (n < 0)            if (n < 0)
1459              fprintf(outfile, "no parentheses with name \"%s\"\n", name);              fprintf(outfile, "no parentheses with name \"%s\"\n", name);
1460            else copystrings |= 1 << n;            else copystrings |= 1 << n;
# Line 1058  while (!done) Line 1495  while (!done)
1495            }            }
1496          continue;          continue;
1497    
1498    #if !defined NODFA
1499            case 'D':
1500    #if !defined NOPOSIX
1501            if (posix || do_posix)
1502              printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1503            else
1504    #endif
1505              use_dfa = 1;
1506            continue;
1507    
1508            case 'F':
1509            options |= PCRE_DFA_SHORTEST;
1510            continue;
1511    #endif
1512    
1513          case 'G':          case 'G':
1514          if (isdigit(*p))          if (isdigit(*p))
1515            {            {
# Line 1067  while (!done) Line 1519  while (!done)
1519          else if (isalnum(*p))          else if (isalnum(*p))
1520            {            {
1521            uschar name[256];            uschar name[256];
1522            uschar *pp = name;            uschar *npp = name;
1523            while (isalnum(*p)) *pp++ = *p++;            while (isalnum(*p)) *npp++ = *p++;
1524            *pp = 0;            *npp = 0;
1525            n = pcre_get_stringnumber(re, name);            n = pcre_get_stringnumber(re, (char *)name);
1526            if (n < 0)            if (n < 0)
1527              fprintf(outfile, "no parentheses with name \"%s\"\n", name);              fprintf(outfile, "no parentheses with name \"%s\"\n", name);
1528            else getstrings |= 1 << n;            else getstrings |= 1 << n;
# Line 1095  while (!done) Line 1547  while (!done)
1547            {            {
1548            size_offsets_max = n;            size_offsets_max = n;
1549            free(offsets);            free(offsets);
1550            use_offsets = offsets = malloc(size_offsets_max * sizeof(int));            use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1551            if (offsets == NULL)            if (offsets == NULL)
1552              {              {
1553              printf("** Failed to get %d bytes of memory for offsets vector\n",              printf("** Failed to get %d bytes of memory for offsets vector\n",
1554                size_offsets_max * sizeof(int));                size_offsets_max * sizeof(int));
1555              return 1;              yield = 1;
1556                goto EXIT;
1557              }              }
1558            }            }
1559          use_size_offsets = n;          use_size_offsets = n;
1560          if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */          if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */
1561          continue;          continue;
1562    
1563            case 'P':
1564            options |= PCRE_PARTIAL;
1565            continue;
1566    
1567    #if !defined NODFA
1568            case 'R':
1569            options |= PCRE_DFA_RESTART;
1570            continue;
1571    #endif
1572    
1573            case 'S':
1574            show_malloc = 1;
1575            continue;
1576    
1577          case 'Z':          case 'Z':
1578          options |= PCRE_NOTEOL;          options |= PCRE_NOTEOL;
1579          continue;          continue;
1580    
1581            case '?':
1582            options |= PCRE_NO_UTF8_CHECK;
1583            continue;
1584          }          }
1585        *q++ = c;        *q++ = c;
1586        }        }
1587      *q = 0;      *q = 0;
1588      len = q - dbuffer;      len = q - dbuffer;
1589    
1590        if ((all_use_dfa || use_dfa) && find_match_limit)
1591          {
1592          printf("**Match limit not relevant for DFA matching: ignored\n");
1593          find_match_limit = 0;
1594          }
1595    
1596      /* Handle matching via the POSIX interface, which does not      /* Handle matching via the POSIX interface, which does not
1597      support timing or playing with the match limit or callout data. */      support timing or playing with the match limit or callout data. */
1598    
# Line 1126  while (!done) Line 1603  while (!done)
1603        int eflags = 0;        int eflags = 0;
1604        regmatch_t *pmatch = NULL;        regmatch_t *pmatch = NULL;
1605        if (use_size_offsets > 0)        if (use_size_offsets > 0)
1606          pmatch = malloc(sizeof(regmatch_t) * use_size_offsets);          pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
1607        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1608        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1609    
# Line 1134  while (!done) Line 1611  while (!done)
1611    
1612        if (rc != 0)        if (rc != 0)
1613          {          {
1614          (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));          (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);
1615          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1616          }          }
1617          else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
1618                  != 0)
1619            {
1620            fprintf(outfile, "Matched with REG_NOSUB\n");
1621            }
1622        else        else
1623          {          {
1624          size_t i;          size_t i;
# Line 1173  while (!done) Line 1655  while (!done)
1655          register int i;          register int i;
1656          clock_t time_taken;          clock_t time_taken;
1657          clock_t start_time = clock();          clock_t start_time = clock();
1658    
1659    #if !defined NODFA
1660            if (all_use_dfa || use_dfa)
1661              {
1662              int workspace[1000];
1663              for (i = 0; i < LOOPREPEAT; i++)
1664                count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
1665                  options | g_notempty, use_offsets, use_size_offsets, workspace,
1666                  sizeof(workspace)/sizeof(int));
1667              }
1668            else
1669    #endif
1670    
1671          for (i = 0; i < LOOPREPEAT; i++)          for (i = 0; i < LOOPREPEAT; i++)
1672            count = pcre_exec(re, extra, (char *)bptr, len,            count = pcre_exec(re, extra, (char *)bptr, len,
1673              start_offset, options | g_notempty, use_offsets, use_size_offsets);              start_offset, options | g_notempty, use_offsets, use_size_offsets);
1674    
1675          time_taken = clock() - start_time;          time_taken = clock() - start_time;
1676          fprintf(outfile, "Execute time %.3f milliseconds\n",          fprintf(outfile, "Execute time %.3f milliseconds\n",
1677            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
# Line 1183  while (!done) Line 1679  while (!done)
1679          }          }
1680    
1681        /* If find_match_limit is set, we want to do repeated matches with        /* If find_match_limit is set, we want to do repeated matches with
1682        varying limits in order to find the minimum value. */        varying limits in order to find the minimum value for the match limit and
1683          for the recursion limit. */
1684    
1685        if (find_match_limit)        if (find_match_limit)
1686          {          {
         int min = 0;  
         int mid = 64;  
         int max = -1;  
   
1687          if (extra == NULL)          if (extra == NULL)
1688            {            {
1689            extra = malloc(sizeof(pcre_extra));            extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1690            extra->flags = 0;            extra->flags = 0;
1691            }            }
         extra->flags |= PCRE_EXTRA_MATCH_LIMIT;  
1692    
1693          for (;;)          count = check_match_limit(re, extra, bptr, len, start_offset,
1694            {            options|g_notempty, use_offsets, use_size_offsets,
1695            extra->match_limit = mid;            PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
1696            count = pcre_exec(re, extra, (char *)bptr, len, start_offset,            PCRE_ERROR_MATCHLIMIT, "match()");
1697              options | g_notempty, use_offsets, use_size_offsets);  
1698            if (count == PCRE_ERROR_MATCHLIMIT)          count = check_match_limit(re, extra, bptr, len, start_offset,
1699              {            options|g_notempty, use_offsets, use_size_offsets,
1700              /* fprintf(outfile, "Testing match limit = %d\n", mid); */            PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
1701              min = mid;            PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
             mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;  
             }  
           else if (count >= 0 || count == PCRE_ERROR_NOMATCH)  
             {  
             if (mid == min + 1)  
               {  
               fprintf(outfile, "Minimum match limit = %d\n", mid);  
               break;  
               }  
             /* fprintf(outfile, "Testing match limit = %d\n", mid); */  
             max = mid;  
             mid = (min + mid)/2;  
             }  
           else break;    /* Some other error */  
           }  
   
         extra->flags &= ~PCRE_EXTRA_MATCH_LIMIT;  
1702          }          }
1703    
1704        /* If callout_data is set, use the interface with additional data */        /* If callout_data is set, use the interface with additional data */
# Line 1232  while (!done) Line 1707  while (!done)
1707          {          {
1708          if (extra == NULL)          if (extra == NULL)
1709            {            {
1710            extra = malloc(sizeof(pcre_extra));            extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1711            extra->flags = 0;            extra->flags = 0;
1712            }            }
1713          extra->flags |= PCRE_EXTRA_CALLOUT_DATA;          extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
1714          extra->callout_data = (void *)callout_data;          extra->callout_data = &callout_data;
1715          count = pcre_exec(re, extra, (char *)bptr, len, start_offset,          count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
1716            options | g_notempty, use_offsets, use_size_offsets);            options | g_notempty, use_offsets, use_size_offsets);
1717          extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;          extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
# Line 1245  while (!done) Line 1720  while (!done)
1720        /* The normal case is just to do the match once, with the default        /* The normal case is just to do the match once, with the default
1721        value of match_limit. */        value of match_limit. */
1722    
1723        else count = pcre_exec(re, extra, (char *)bptr, len,  #if !defined NODFA
1724          start_offset, options | g_notempty, use_offsets, use_size_offsets);        else if (all_use_dfa || use_dfa)
1725            {
1726            int workspace[1000];
1727            count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
1728              options | g_notempty, use_offsets, use_size_offsets, workspace,
1729              sizeof(workspace)/sizeof(int));
1730            if (count == 0)
1731              {
1732              fprintf(outfile, "Matched, but too many subsidiary matches\n");
1733              count = use_size_offsets/2;
1734              }
1735            }
1736    #endif
1737    
1738        if (count == 0)        else
1739          {          {
1740          fprintf(outfile, "Matched, but too many substrings\n");          count = pcre_exec(re, extra, (char *)bptr, len,
1741          count = use_size_offsets/3;            start_offset, options | g_notempty, use_offsets, use_size_offsets);
1742            if (count == 0)
1743              {
1744              fprintf(outfile, "Matched, but too many substrings\n");
1745              count = use_size_offsets/3;
1746              }
1747          }          }
1748    
1749        /* Matched */        /* Matched */
# Line 1333  while (!done) Line 1825  while (!done)
1825            }            }
1826          }          }
1827    
1828          /* There was a partial match */
1829    
1830          else if (count == PCRE_ERROR_PARTIAL)
1831            {
1832            fprintf(outfile, "Partial match");
1833    #if !defined NODFA
1834            if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
1835              fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
1836                bptr + use_offsets[0]);
1837    #endif
1838            fprintf(outfile, "\n");
1839            break;  /* Out of the /g loop */
1840            }
1841    
1842        /* Failed to match. If this is a /g or /G loop and we previously set        /* Failed to match. If this is a /g or /G loop and we previously set
1843        g_notempty after a null match, this is not necessarily the end.        g_notempty after a null match, this is not necessarily the end.
1844        We want to advance the start offset, and continue. Fudge the offset        We want to advance the start offset, and continue. In the case of UTF-8
1845        values to achieve this. We won't be at the end of the string - that        matching, the advance must be one character, not one byte. Fudge the
1846        was checked before setting g_notempty. */        offset values to achieve this. We won't be at the end of the string -
1847          that was checked before setting g_notempty. */
1848    
1849        else        else
1850          {          {
1851          if (g_notempty != 0)          if (g_notempty != 0)
1852            {            {
1853              int onechar = 1;
1854            use_offsets[0] = start_offset;            use_offsets[0] = start_offset;
1855            use_offsets[1] = start_offset + 1;            if (use_utf8)
1856                {
1857                while (start_offset + onechar < len)
1858                  {
1859                  int tb = bptr[start_offset+onechar];
1860                  if (tb <= 127) break;
1861                  tb &= 0xc0;
1862                  if (tb != 0 && tb != 0xc0) onechar++;
1863                  }
1864                }
1865              use_offsets[1] = start_offset + onechar;
1866            }            }
1867          else          else
1868            {            {
1869            if (gmatched == 0)   /* Error if no previous matches */            if (count == PCRE_ERROR_NOMATCH)
1870              {              {
1871              if (count == -1) fprintf(outfile, "No match\n");              if (gmatched == 0) fprintf(outfile, "No match\n");
               else fprintf(outfile, "Error %d\n", count);  
1872              }              }
1873              else fprintf(outfile, "Error %d\n", count);
1874            break;  /* Out of the /g loop */            break;  /* Out of the /g loop */
1875            }            }
1876          }          }
# Line 1395  while (!done) Line 1913  while (!done)
1913    if (posix || do_posix) regfree(&preg);    if (posix || do_posix) regfree(&preg);
1914  #endif  #endif
1915    
1916    if (re != NULL) free(re);    if (re != NULL) new_free(re);
1917    if (extra != NULL) free(extra);    if (extra != NULL) new_free(extra);
1918    if (tables != NULL)    if (tables != NULL)
1919      {      {
1920      free((void *)tables);      new_free((void *)tables);
1921      setlocale(LC_CTYPE, "C");      setlocale(LC_CTYPE, "C");
1922      }      }
1923    }    }
1924    
1925  fprintf(outfile, "\n");  if (infile == stdin) fprintf(outfile, "\n");
1926  return 0;  
1927    EXIT:
1928    
1929    if (infile != NULL && infile != stdin) fclose(infile);
1930    if (outfile != NULL && outfile != stdout) fclose(outfile);
1931    
1932    free(buffer);
1933    free(dbuffer);
1934    free(pbuffer);
1935    free(offsets);
1936    
1937    return yield;
1938  }  }
1939    
1940  /* End */  /* End of pcretest.c */

Legend:
Removed from v.63  
changed lines
  Added in v.87

  ViewVC Help
Powered by ViewVC 1.1.5