/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 65 by nigel, Sat Feb 24 21:40:08 2007 UTC revision 85 by nigel, Sat Feb 24 21:41:13 2007 UTC
# Line 4  Line 4 
4    
5  /* This program was hacked up as a tester for PCRE. I really should have  /* This program was hacked up as a tester for PCRE. I really should have
6  written it more tidily in the first place. Will I ever learn? It has grown and  written it more tidily in the first place. Will I ever learn? It has grown and
7  been extended and consequently is now rather untidy in places. */  been extended and consequently is now rather, er, *very* untidy in places.
8    
9    -----------------------------------------------------------------------------
10    Redistribution and use in source and binary forms, with or without
11    modification, are permitted provided that the following conditions are met:
12    
13        * Redistributions of source code must retain the above copyright notice,
14          this list of conditions and the following disclaimer.
15    
16        * Redistributions in binary form must reproduce the above copyright
17          notice, this list of conditions and the following disclaimer in the
18          documentation and/or other materials provided with the distribution.
19    
20        * Neither the name of the University of Cambridge nor the names of its
21          contributors may be used to endorse or promote products derived from
22          this software without specific prior written permission.
23    
24    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34    POSSIBILITY OF SUCH DAMAGE.
35    -----------------------------------------------------------------------------
36    */
37    
38    
39  #include <ctype.h>  #include <ctype.h>
40  #include <stdio.h>  #include <stdio.h>
# Line 12  been extended and consequently is now ra Line 42  been extended and consequently is now ra
42  #include <stdlib.h>  #include <stdlib.h>
43  #include <time.h>  #include <time.h>
44  #include <locale.h>  #include <locale.h>
45    #include <errno.h>
 /* We need the internal info for displaying the results of pcre_study(). Also  
 for getting the opcodes for showing compiled code. */  
46    
47  #define PCRE_SPY        /* For Win32 build, import data, not export */  #define PCRE_SPY        /* For Win32 build, import data, not export */
48  #include "internal.h"  
49    /* We include pcre_internal.h because we need the internal info for displaying
50    the results of pcre_study() and we also need to know about the internal
51    macros, structures, and other internal data values; pcretest has "inside
52    information" compared to a program that strictly follows the PCRE API. */
53    
54    #include "pcre_internal.h"
55    
56    /* We need access to the data tables that PCRE uses. So as not to have to keep
57    two copies, we include the source file here, changing the names of the external
58    symbols to prevent clashes. */
59    
60    #define _pcre_utf8_table1      utf8_table1
61    #define _pcre_utf8_table1_size utf8_table1_size
62    #define _pcre_utf8_table2      utf8_table2
63    #define _pcre_utf8_table3      utf8_table3
64    #define _pcre_utf8_table4      utf8_table4
65    #define _pcre_utt              utt
66    #define _pcre_utt_size         utt_size
67    #define _pcre_OP_lengths       OP_lengths
68    
69    #include "pcre_tables.c"
70    
71    /* We also need the pcre_printint() function for printing out compiled
72    patterns. This function is in a separate file so that it can be included in
73    pcre_compile.c when that module is compiled with debugging enabled. */
74    
75    #include "pcre_printint.src"
76    
77    
78  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
79  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 27  Makefile. */ Line 83  Makefile. */
83  #include "pcreposix.h"  #include "pcreposix.h"
84  #endif  #endif
85    
86    /* It is also possible, for the benefit of the version imported into Exim, to
87    build pcretest without support for UTF8 (define NOUTF8), without the interface
88    to the DFA matcher (NODFA), and without the doublecheck of the old "info"
89    function (define NOINFOCHECK). */
90    
91    
92    /* Other parameters */
93    
94  #ifndef CLOCKS_PER_SEC  #ifndef CLOCKS_PER_SEC
95  #ifdef CLK_TCK  #ifdef CLK_TCK
96  #define CLOCKS_PER_SEC CLK_TCK  #define CLOCKS_PER_SEC CLK_TCK
# Line 35  Makefile. */ Line 99  Makefile. */
99  #endif  #endif
100  #endif  #endif
101    
102  #define LOOPREPEAT 50000  #define LOOPREPEAT 500000
103    
104    #define BUFFER_SIZE 30000
105    #define PBUFFER_SIZE BUFFER_SIZE
106    #define DBUFFER_SIZE BUFFER_SIZE
107    
108    
109    /* Static variables */
110    
111  static FILE *outfile;  static FILE *outfile;
112  static int log_store = 0;  static int log_store = 0;
# Line 45  static int callout_extra; Line 115  static int callout_extra;
115  static int callout_fail_count;  static int callout_fail_count;
116  static int callout_fail_id;  static int callout_fail_id;
117  static int first_callout;  static int first_callout;
118  static int utf8;  static int show_malloc;
119    static int use_utf8;
120  static size_t gotten_store;  static size_t gotten_store;
121    
122    static uschar *pbuffer = NULL;
   
 static int utf8_table1[] = {  
   0x0000007f, 0x000007ff, 0x0000ffff, 0x001fffff, 0x03ffffff, 0x7fffffff};  
   
 static int utf8_table2[] = {  
   0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};  
   
 static int utf8_table3[] = {  
   0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};  
   
   
   
 /*************************************************  
 *         Print compiled regex                   *  
 *************************************************/  
   
 /* The code for doing this is held in a separate file that is also included in  
 pcre.c when it is compiled with the debug switch. It defines a function called  
 print_internals(), which uses a table of opcode lengths defined by the macro  
 OP_LENGTHS, whose name must be OP_lengths. */  
   
 static uschar OP_lengths[] = { OP_LENGTHS };  
   
 #include "printint.c"  
123    
124    
125    
# Line 103  return(result); Line 150  return(result);
150    
151    
152    
 /*************************************************  
 *       Convert character value to UTF-8         *  
 *************************************************/  
   
 /* This function takes an integer value in the range 0 - 0x7fffffff  
 and encodes it as a UTF-8 character in 0 to 6 bytes.  
   
 Arguments:  
   cvalue     the character value  
   buffer     pointer to buffer for result - at least 6 bytes long  
   
 Returns:     number of characters placed in the buffer  
              -1 if input character is negative  
              0 if input character is positive but too big (only when  
              int is longer than 32 bits)  
 */  
   
 static int  
 ord2utf8(int cvalue, unsigned char *buffer)  
 {  
 register int i, j;  
 for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++)  
   if (cvalue <= utf8_table1[i]) break;  
 if (i >= sizeof(utf8_table1)/sizeof(int)) return 0;  
 if (cvalue < 0) return -1;  
   
 buffer += i;  
 for (j = i; j > 0; j--)  
  {  
  *buffer-- = 0x80 | (cvalue & 0x3f);  
  cvalue >>= 6;  
  }  
 *buffer = utf8_table2[i] | cvalue;  
 return i + 1;  
 }  
   
153    
154  /*************************************************  /*************************************************
155  *            Convert UTF-8 string to value       *  *            Convert UTF-8 string to value       *
# Line 155  Returns:   >  0 => the number of bytes c Line 166  Returns:   >  0 => the number of bytes c
166             -6 to 0 => malformed UTF-8 character at offset = (-return)             -6 to 0 => malformed UTF-8 character at offset = (-return)
167  */  */
168    
169  int  #if !defined NOUTF8
170    
171    static int
172  utf82ord(unsigned char *buffer, int *vptr)  utf82ord(unsigned char *buffer, int *vptr)
173  {  {
174  int c = *buffer++;  int c = *buffer++;
# Line 186  for (j = 0; j < i; j++) Line 199  for (j = 0; j < i; j++)
199    
200  /* Check that encoding was the correct unique one */  /* Check that encoding was the correct unique one */
201    
202  for (j = 0; j < sizeof(utf8_table1)/sizeof(int); j++)  for (j = 0; j < utf8_table1_size; j++)
203    if (d <= utf8_table1[j]) break;    if (d <= utf8_table1[j]) break;
204  if (j != i) return -(i+1);  if (j != i) return -(i+1);
205    
# Line 196  if (j != i) return -(i+1); Line 209  if (j != i) return -(i+1);
209  return i+1;  return i+1;
210  }  }
211    
212    #endif
213    
214    
215    
216    /*************************************************
217    *       Convert character value to UTF-8         *
218    *************************************************/
219    
220    /* This function takes an integer value in the range 0 - 0x7fffffff
221    and encodes it as a UTF-8 character in 0 to 6 bytes.
222    
223    Arguments:
224      cvalue     the character value
225      buffer     pointer to buffer for result - at least 6 bytes long
226    
227    Returns:     number of characters placed in the buffer
228    */
229    
230    static int
231    ord2utf8(int cvalue, uschar *buffer)
232    {
233    register int i, j;
234    for (i = 0; i < utf8_table1_size; i++)
235      if (cvalue <= utf8_table1[i]) break;
236    buffer += i;
237    for (j = i; j > 0; j--)
238     {
239     *buffer-- = 0x80 | (cvalue & 0x3f);
240     cvalue >>= 6;
241     }
242    *buffer = utf8_table2[i] | cvalue;
243    return i + 1;
244    }
245    
246    
247    
248  /*************************************************  /*************************************************
# Line 208  chars without printing. */ Line 255  chars without printing. */
255    
256  static int pchars(unsigned char *p, int length, FILE *f)  static int pchars(unsigned char *p, int length, FILE *f)
257  {  {
258  int c;  int c = 0;
259  int yield = 0;  int yield = 0;
260    
261  while (length-- > 0)  while (length-- > 0)
262    {    {
263    if (utf8)  #if !defined NOUTF8
264      if (use_utf8)
265      {      {
266      int rc = utf82ord(p, &c);      int rc = utf82ord(p, &c);
267    
# Line 235  while (length-- > 0) Line 283  while (length-- > 0)
283        continue;        continue;
284        }        }
285      }      }
286    #endif
287    
288     /* Not UTF-8, or malformed UTF-8  */     /* Not UTF-8, or malformed UTF-8  */
289    
# Line 266  data is not zero. */ Line 315  data is not zero. */
315  static int callout(pcre_callout_block *cb)  static int callout(pcre_callout_block *cb)
316  {  {
317  FILE *f = (first_callout | callout_extra)? outfile : NULL;  FILE *f = (first_callout | callout_extra)? outfile : NULL;
318  int i, pre_start, post_start;  int i, pre_start, post_start, subject_length;
319    
320  if (callout_extra)  if (callout_extra)
321    {    {
   int i;  
322    fprintf(f, "Callout %d: last capture = %d\n",    fprintf(f, "Callout %d: last capture = %d\n",
323      cb->callout_number, cb->capture_last);      cb->callout_number, cb->capture_last);
324    
# Line 298  pre_start = pchars((unsigned char *)cb-> Line 346  pre_start = pchars((unsigned char *)cb->
346  post_start = pchars((unsigned char *)(cb->subject + cb->start_match),  post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
347    cb->current_position - cb->start_match, f);    cb->current_position - cb->start_match, f);
348    
349    subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
350    
351  (void)pchars((unsigned char *)(cb->subject + cb->current_position),  (void)pchars((unsigned char *)(cb->subject + cb->current_position),
352    cb->subject_length - cb->current_position, f);    cb->subject_length - cb->current_position, f);
353    
354  if (f != NULL) fprintf(f, "\n");  if (f != NULL) fprintf(f, "\n");
355    
356  /* Always print appropriate indicators, with callout number if not already  /* Always print appropriate indicators, with callout number if not already
357  shown */  shown. For automatic callouts, show the pattern offset. */
358    
359  if (callout_extra) fprintf(outfile, "    ");  if (cb->callout_number == 255)
360    else fprintf(outfile, "%3d ", cb->callout_number);    {
361      fprintf(outfile, "%+3d ", cb->pattern_position);
362      if (cb->pattern_position > 99) fprintf(outfile, "\n    ");
363      }
364    else
365      {
366      if (callout_extra) fprintf(outfile, "    ");
367        else fprintf(outfile, "%3d ", cb->callout_number);
368      }
369    
370  for (i = 0; i < pre_start; i++) fprintf(outfile, " ");  for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
371  fprintf(outfile, "^");  fprintf(outfile, "^");
# Line 318  if (post_start > 0) Line 376  if (post_start > 0)
376    fprintf(outfile, "^");    fprintf(outfile, "^");
377    }    }
378    
379  fprintf(outfile, "\n");  for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
380      fprintf(outfile, " ");
381    
382    fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
383      pbuffer + cb->pattern_position);
384    
385    fprintf(outfile, "\n");
386  first_callout = 0;  first_callout = 0;
387    
388  if ((int)(cb->callout_data) != 0)  if (cb->callout_data != NULL)
389    {    {
390    fprintf(outfile, "Callout data = %d\n", (int)(cb->callout_data));    int callout_data = *((int *)(cb->callout_data));
391    return (int)(cb->callout_data);    if (callout_data != 0)
392        {
393        fprintf(outfile, "Callout data = %d\n", callout_data);
394        return callout_data;
395        }
396    }    }
397    
398  return (cb->callout_number != callout_fail_id)? 0 :  return (cb->callout_number != callout_fail_id)? 0 :
# Line 334  return (cb->callout_number != callout_fa Line 401  return (cb->callout_number != callout_fa
401    
402    
403  /*************************************************  /*************************************************
404  *            Local malloc function               *  *            Local malloc functions              *
405  *************************************************/  *************************************************/
406    
407  /* Alternative malloc function, to test functionality and show the size of the  /* Alternative malloc function, to test functionality and show the size of the
# Line 342  compiled re. */ Line 409  compiled re. */
409    
410  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
411  {  {
412    void *block = malloc(size);
413  gotten_store = size;  gotten_store = size;
414  return malloc(size);  if (show_malloc)
415      fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
416    return block;
417    }
418    
419    static void new_free(void *block)
420    {
421    if (show_malloc)
422      fprintf(outfile, "free             %p\n", block);
423    free(block);
424  }  }
425    
426    
427    /* For recursion malloc/free, to test stacking calls */
428    
429    static void *stack_malloc(size_t size)
430    {
431    void *block = malloc(size);
432    if (show_malloc)
433      fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
434    return block;
435    }
436    
437    static void stack_free(void *block)
438    {
439    if (show_malloc)
440      fprintf(outfile, "stack_free       %p\n", block);
441    free(block);
442    }
443    
444    
445  /*************************************************  /*************************************************
446  *          Call pcre_fullinfo()                  *  *          Call pcre_fullinfo()                  *
# Line 364  if ((rc = pcre_fullinfo(re, study, optio Line 458  if ((rc = pcre_fullinfo(re, study, optio
458    
459    
460  /*************************************************  /*************************************************
461    *         Byte flipping function                 *
462    *************************************************/
463    
464    static long int
465    byteflip(long int value, int n)
466    {
467    if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
468    return ((value & 0x000000ff) << 24) |
469           ((value & 0x0000ff00) <<  8) |
470           ((value & 0x00ff0000) >>  8) |
471           ((value & 0xff000000) >> 24);
472    }
473    
474    
475    
476    
477    /*************************************************
478  *                Main Program                    *  *                Main Program                    *
479  *************************************************/  *************************************************/
480    
# Line 382  int showinfo = 0; Line 493  int showinfo = 0;
493  int showstore = 0;  int showstore = 0;
494  int size_offsets = 45;  int size_offsets = 45;
495  int size_offsets_max;  int size_offsets_max;
496  int *offsets;  int *offsets = NULL;
497  #if !defined NOPOSIX  #if !defined NOPOSIX
498  int posix = 0;  int posix = 0;
499  #endif  #endif
500  int debug = 0;  int debug = 0;
501  int done = 0;  int done = 0;
502  unsigned char buffer[30000];  int all_use_dfa = 0;
503  unsigned char dbuffer[1024];  int yield = 0;
504    
505    unsigned char *buffer;
506    unsigned char *dbuffer;
507    
508  /* Static so that new_malloc can use it. */  /* Get buffers from malloc() so that Electric Fence will check their misuse
509    when I am debugging. */
510    
511    buffer = (unsigned char *)malloc(BUFFER_SIZE);
512    dbuffer = (unsigned char *)malloc(DBUFFER_SIZE);
513    pbuffer = (unsigned char *)malloc(PBUFFER_SIZE);
514    
515    /* The outfile variable is static so that new_malloc can use it. The _setmode()
516    stuff is some magic that I don't understand, but which apparently does good
517    things in Windows. It's related to line terminations.  */
518    
519    #if defined(_WIN32) || defined(WIN32)
520    _setmode( _fileno( stdout ), 0x8000 );
521    #endif  /* defined(_WIN32) || defined(WIN32) */
522    
523  outfile = stdout;  outfile = stdout;
524    
# Line 406  while (argc > 1 && argv[op][0] == '-') Line 533  while (argc > 1 && argv[op][0] == '-')
533    else if (strcmp(argv[op], "-t") == 0) timeit = 1;    else if (strcmp(argv[op], "-t") == 0) timeit = 1;
534    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
535    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
536    #if !defined NODFA
537      else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
538    #endif
539    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
540        ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),        ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
541          *endptr == 0))          *endptr == 0))
# Line 423  while (argc > 1 && argv[op][0] == '-') Line 553  while (argc > 1 && argv[op][0] == '-')
553      printf("Compiled with\n");      printf("Compiled with\n");
554      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
555      printf("  %sUTF-8 support\n", rc? "" : "No ");      printf("  %sUTF-8 support\n", rc? "" : "No ");
556        (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
557        printf("  %sUnicode properties support\n", rc? "" : "No ");
558      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
559      printf("  Newline character is %s\n", (rc == '\r')? "CR" : "LF");      printf("  Newline character is %s\n", (rc == '\r')? "CR" : "LF");
560      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
# Line 431  while (argc > 1 && argv[op][0] == '-') Line 563  while (argc > 1 && argv[op][0] == '-')
563      printf("  POSIX malloc threshold = %d\n", rc);      printf("  POSIX malloc threshold = %d\n", rc);
564      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
565      printf("  Default match limit = %d\n", rc);      printf("  Default match limit = %d\n", rc);
566        (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
567        printf("  Match recursion uses %s\n", rc? "stack" : "heap");
568      exit(0);      exit(0);
569      }      }
570    else    else
# Line 438  while (argc > 1 && argv[op][0] == '-') Line 572  while (argc > 1 && argv[op][0] == '-')
572      printf("** Unknown or malformed option %s\n", argv[op]);      printf("** Unknown or malformed option %s\n", argv[op]);
573      printf("Usage:   pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");      printf("Usage:   pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");
574      printf("  -C     show PCRE compile-time options and exit\n");      printf("  -C     show PCRE compile-time options and exit\n");
575      printf("  -d     debug: show compiled code; implies -i\n"      printf("  -d     debug: show compiled code; implies -i\n");
576             "  -i     show information about compiled pattern\n"  #if !defined NODFA
577        printf("  -dfa   force DFA matching for all subjects\n");
578    #endif
579        printf("  -i     show information about compiled pattern\n"
580               "  -m     output memory used information\n"
581             "  -o <n> set size of offsets vector to <n>\n");             "  -o <n> set size of offsets vector to <n>\n");
582  #if !defined NOPOSIX  #if !defined NOPOSIX
583      printf("  -p     use POSIX interface\n");      printf("  -p     use POSIX interface\n");
584  #endif  #endif
585      printf("  -s     output store information\n"      printf("  -s     output store (memory) used information\n"
586             "  -t     time compilation and execution\n");             "  -t     time compilation and execution\n");
587      return 1;      yield = 1;
588        goto EXIT;
589      }      }
590    op++;    op++;
591    argc--;    argc--;
# Line 455  while (argc > 1 && argv[op][0] == '-') Line 594  while (argc > 1 && argv[op][0] == '-')
594  /* Get the store for the offsets vector, and remember what it was */  /* Get the store for the offsets vector, and remember what it was */
595    
596  size_offsets_max = size_offsets;  size_offsets_max = size_offsets;
597  offsets = malloc(size_offsets_max * sizeof(int));  offsets = (int *)malloc(size_offsets_max * sizeof(int));
598  if (offsets == NULL)  if (offsets == NULL)
599    {    {
600    printf("** Failed to get %d bytes of memory for offsets vector\n",    printf("** Failed to get %d bytes of memory for offsets vector\n",
601      size_offsets_max * sizeof(int));      size_offsets_max * sizeof(int));
602    return 1;    yield = 1;
603      goto EXIT;
604    }    }
605    
606  /* Sort out the input and output files */  /* Sort out the input and output files */
607    
608  if (argc > 1)  if (argc > 1)
609    {    {
610    infile = fopen(argv[op], "r");    infile = fopen(argv[op], "rb");
611    if (infile == NULL)    if (infile == NULL)
612      {      {
613      printf("** Failed to open %s\n", argv[op]);      printf("** Failed to open %s\n", argv[op]);
614      return 1;      yield = 1;
615        goto EXIT;
616      }      }
617    }    }
618    
619  if (argc > 2)  if (argc > 2)
620    {    {
621    outfile = fopen(argv[op+1], "w");    outfile = fopen(argv[op+1], "wb");
622    if (outfile == NULL)    if (outfile == NULL)
623      {      {
624      printf("** Failed to open %s\n", argv[op+1]);      printf("** Failed to open %s\n", argv[op+1]);
625      return 1;      yield = 1;
626        goto EXIT;
627      }      }
628    }    }
629    
630  /* Set alternative malloc function */  /* Set alternative malloc function */
631    
632  pcre_malloc = new_malloc;  pcre_malloc = new_malloc;
633    pcre_free = new_free;
634    pcre_stack_malloc = stack_malloc;
635    pcre_stack_free = stack_free;
636    
637  /* Heading line, then prompt for first regex if stdin */  /* Heading line, then prompt for first regex if stdin */
638    
# Line 507  while (!done) Line 652  while (!done)
652    
653    const char *error;    const char *error;
654    unsigned char *p, *pp, *ppp;    unsigned char *p, *pp, *ppp;
655      unsigned char *to_file = NULL;
656    const unsigned char *tables = NULL;    const unsigned char *tables = NULL;
657      unsigned long int true_size, true_study_size = 0;
658      size_t size, regex_gotten_store;
659    int do_study = 0;    int do_study = 0;
660    int do_debug = debug;    int do_debug = debug;
661    int do_G = 0;    int do_G = 0;
662    int do_g = 0;    int do_g = 0;
663    int do_showinfo = showinfo;    int do_showinfo = showinfo;
664    int do_showrest = 0;    int do_showrest = 0;
665      int do_flip = 0;
666    int erroroffset, len, delimiter;    int erroroffset, len, delimiter;
667    
668    utf8 = 0;    use_utf8 = 0;
669    
670    if (infile == stdin) printf("  re> ");    if (infile == stdin) printf("  re> ");
671    if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;    if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL) break;
672    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
673    fflush(outfile);    fflush(outfile);
674    
# Line 527  while (!done) Line 676  while (!done)
676    while (isspace(*p)) p++;    while (isspace(*p)) p++;
677    if (*p == 0) continue;    if (*p == 0) continue;
678    
679    /* Get the delimiter and seek the end of the pattern; if is isn't    /* See if the pattern is to be loaded pre-compiled from a file. */
680    complete, read more. */  
681      if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
682        {
683        unsigned long int magic;
684        uschar sbuf[8];
685        FILE *f;
686    
687        p++;
688        pp = p + (int)strlen((char *)p);
689        while (isspace(pp[-1])) pp--;
690        *pp = 0;
691    
692        f = fopen((char *)p, "rb");
693        if (f == NULL)
694          {
695          fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
696          continue;
697          }
698    
699        if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
700    
701        true_size =
702          (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
703        true_study_size =
704          (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
705    
706        re = (real_pcre *)new_malloc(true_size);
707        regex_gotten_store = gotten_store;
708    
709        if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
710    
711        magic = ((real_pcre *)re)->magic_number;
712        if (magic != MAGIC_NUMBER)
713          {
714          if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
715            {
716            do_flip = 1;
717            }
718          else
719            {
720            fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
721            fclose(f);
722            continue;
723            }
724          }
725    
726        fprintf(outfile, "Compiled regex%s loaded from %s\n",
727          do_flip? " (byte-inverted)" : "", p);
728    
729        /* Need to know if UTF-8 for printing data strings */
730    
731        new_info(re, NULL, PCRE_INFO_OPTIONS, &options);
732        use_utf8 = (options & PCRE_UTF8) != 0;
733    
734        /* Now see if there is any following study data */
735    
736        if (true_study_size != 0)
737          {
738          pcre_study_data *psd;
739    
740          extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
741          extra->flags = PCRE_EXTRA_STUDY_DATA;
742    
743          psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
744          extra->study_data = psd;
745    
746          if (fread(psd, 1, true_study_size, f) != true_study_size)
747            {
748            FAIL_READ:
749            fprintf(outfile, "Failed to read data from %s\n", p);
750            if (extra != NULL) new_free(extra);
751            if (re != NULL) new_free(re);
752            fclose(f);
753            continue;
754            }
755          fprintf(outfile, "Study data loaded from %s\n", p);
756          do_study = 1;     /* To get the data output if requested */
757          }
758        else fprintf(outfile, "No study data\n");
759    
760        fclose(f);
761        goto SHOW_INFO;
762        }
763    
764      /* In-line pattern (the usual case). Get the delimiter and seek the end of
765      the pattern; if is isn't complete, read more. */
766    
767    delimiter = *p++;    delimiter = *p++;
768    
# Line 550  while (!done) Line 784  while (!done)
784        }        }
785      if (*pp != 0) break;      if (*pp != 0) break;
786    
787      len = sizeof(buffer) - (pp - buffer);      len = BUFFER_SIZE - (pp - buffer);
788      if (len < 256)      if (len < 256)
789        {        {
790        fprintf(outfile, "** Expression too long - missing delimiter?\n");        fprintf(outfile, "** Expression too long - missing delimiter?\n");
# Line 573  while (!done) Line 807  while (!done)
807    
808    if (pp[1] == '\\') *pp++ = '\\';    if (pp[1] == '\\') *pp++ = '\\';
809    
810    /* Terminate the pattern at the delimiter */    /* Terminate the pattern at the delimiter, and save a copy of the pattern
811      for callouts. */
812    
813    *pp++ = 0;    *pp++ = 0;
814      strcpy((char *)pbuffer, (char *)p);
815    
816    /* Look for options after final delimiter */    /* Look for options after final delimiter */
817    
# Line 587  while (!done) Line 823  while (!done)
823      {      {
824      switch (*pp++)      switch (*pp++)
825        {        {
826          case 'f': options |= PCRE_FIRSTLINE; break;
827        case 'g': do_g = 1; break;        case 'g': do_g = 1; break;
828        case 'i': options |= PCRE_CASELESS; break;        case 'i': options |= PCRE_CASELESS; break;
829        case 'm': options |= PCRE_MULTILINE; break;        case 'm': options |= PCRE_MULTILINE; break;
# Line 595  while (!done) Line 832  while (!done)
832    
833        case '+': do_showrest = 1; break;        case '+': do_showrest = 1; break;
834        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
835          case 'C': options |= PCRE_AUTO_CALLOUT; break;
836        case 'D': do_debug = do_showinfo = 1; break;        case 'D': do_debug = do_showinfo = 1; break;
837        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
838          case 'F': do_flip = 1; break;
839        case 'G': do_G = 1; break;        case 'G': do_G = 1; break;
840        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
841        case 'M': log_store = 1; break;        case 'M': log_store = 1; break;
# Line 609  while (!done) Line 848  while (!done)
848        case 'S': do_study = 1; break;        case 'S': do_study = 1; break;
849        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
850        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
851        case '8': options |= PCRE_UTF8; utf8 = 1; break;        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
852          case '?': options |= PCRE_NO_UTF8_CHECK; break;
853    
854        case 'L':        case 'L':
855        ppp = pp;        ppp = pp;
856        while (*ppp != '\n' && *ppp != ' ') ppp++;        /* The '\r' test here is so that it works on Windows */
857          while (*ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
858        *ppp = 0;        *ppp = 0;
859        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
860          {          {
# Line 624  while (!done) Line 865  while (!done)
865        pp = ppp;        pp = ppp;
866        break;        break;
867    
868        case '\n': case ' ': break;        case '>':
869          to_file = pp;
870          while (*pp != 0) pp++;
871          while (isspace(pp[-1])) pp--;
872          *pp = 0;
873          break;
874    
875          case '\r':                      /* So that it works in Windows */
876          case '\n':
877          case ' ':
878          break;
879    
880        default:        default:
881        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
882        goto SKIP_DATA;        goto SKIP_DATA;
# Line 640  while (!done) Line 892  while (!done)
892      {      {
893      int rc;      int rc;
894      int cflags = 0;      int cflags = 0;
895    
896      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
897      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
898        if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
899      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
900    
901      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 649  while (!done) Line 903  while (!done)
903    
904      if (rc != 0)      if (rc != 0)
905        {        {
906        (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));        (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);
907        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
908        goto SKIP_DATA;        goto SKIP_DATA;
909        }        }
# Line 690  while (!done) Line 944  while (!done)
944          {          {
945          for (;;)          for (;;)
946            {            {
947            if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)            if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)
948              {              {
949              done = 1;              done = 1;
950              goto CONTINUE;              goto CONTINUE;
# Line 714  while (!done) Line 968  while (!done)
968                sizeof(real_pcre) -                sizeof(real_pcre) -
969                ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));                ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
970    
971        /* Extract the size for possible writing before possibly flipping it,
972        and remember the store that was got. */
973    
974        true_size = ((real_pcre *)re)->size;
975        regex_gotten_store = gotten_store;
976    
977        /* If /S was present, study the regexp to generate additional info to
978        help with the matching. */
979    
980        if (do_study)
981          {
982          if (timeit)
983            {
984            register int i;
985            clock_t time_taken;
986            clock_t start_time = clock();
987            for (i = 0; i < LOOPREPEAT; i++)
988              extra = pcre_study(re, study_options, &error);
989            time_taken = clock() - start_time;
990            if (extra != NULL) free(extra);
991            fprintf(outfile, "  Study time %.3f milliseconds\n",
992              (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
993                (double)CLOCKS_PER_SEC);
994            }
995          extra = pcre_study(re, study_options, &error);
996          if (error != NULL)
997            fprintf(outfile, "Failed to study: %s\n", error);
998          else if (extra != NULL)
999            true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1000          }
1001    
1002        /* If the 'F' option was present, we flip the bytes of all the integer
1003        fields in the regex data block and the study block. This is to make it
1004        possible to test PCRE's handling of byte-flipped patterns, e.g. those
1005        compiled on a different architecture. */
1006    
1007        if (do_flip)
1008          {
1009          real_pcre *rre = (real_pcre *)re;
1010          rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));
1011          rre->size = byteflip(rre->size, sizeof(rre->size));
1012          rre->options = byteflip(rre->options, sizeof(rre->options));
1013          rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1014          rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));
1015          rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));
1016          rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));
1017          rre->name_table_offset = byteflip(rre->name_table_offset,
1018            sizeof(rre->name_table_offset));
1019          rre->name_entry_size = byteflip(rre->name_entry_size,
1020            sizeof(rre->name_entry_size));
1021          rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));
1022    
1023          if (extra != NULL)
1024            {
1025            pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1026            rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1027            rsd->options = byteflip(rsd->options, sizeof(rsd->options));
1028            }
1029          }
1030    
1031        /* Extract information from the compiled data if required */
1032    
1033        SHOW_INFO:
1034    
1035      if (do_showinfo)      if (do_showinfo)
1036        {        {
1037        unsigned long int get_options;        unsigned long int get_options, all_options;
1038    #if !defined NOINFOCHECK
1039        int old_first_char, old_options, old_count;        int old_first_char, old_options, old_count;
1040    #endif
1041        int count, backrefmax, first_char, need_char;        int count, backrefmax, first_char, need_char;
1042        int nameentrysize, namecount;        int nameentrysize, namecount;
1043        const uschar *nametable;        const uschar *nametable;
       size_t size;  
1044    
1045        if (do_debug)        if (do_debug)
1046          {          {
1047          fprintf(outfile, "------------------------------------------------------------------\n");          fprintf(outfile, "------------------------------------------------------------------\n");
1048          print_internals(re, outfile);          pcre_printint(re, outfile);
1049          }          }
1050    
1051        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
# Line 737  while (!done) Line 1056  while (!done)
1056        new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);        new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1057        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1058        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1059        new_info(re, NULL, PCRE_INFO_NAMETABLE, &nametable);        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1060    
1061    #if !defined NOINFOCHECK
1062        old_count = pcre_info(re, &old_options, &old_first_char);        old_count = pcre_info(re, &old_options, &old_first_char);
1063        if (count < 0) fprintf(outfile,        if (count < 0) fprintf(outfile,
1064          "Error %d from pcre_info()\n", count);          "Error %d from pcre_info()\n", count);
# Line 756  while (!done) Line 1076  while (!done)
1076            "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",            "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1077              get_options, old_options);              get_options, old_options);
1078          }          }
1079    #endif
1080    
1081        if (size != gotten_store) fprintf(outfile,        if (size != regex_gotten_store) fprintf(outfile,
1082          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1083          size, gotten_store);          (int)size, (int)regex_gotten_store);
1084    
1085        fprintf(outfile, "Capturing subpattern count = %d\n", count);        fprintf(outfile, "Capturing subpattern count = %d\n", count);
1086        if (backrefmax > 0)        if (backrefmax > 0)
# Line 777  while (!done) Line 1098  while (!done)
1098            }            }
1099          }          }
1100    
1101          /* The NOPARTIAL bit is a private bit in the options, so we have
1102          to fish it out via out back door */
1103    
1104          all_options = ((real_pcre *)re)->options;
1105          if (do_flip)
1106            {
1107            all_options = byteflip(all_options, sizeof(all_options));
1108            }
1109    
1110          if ((all_options & PCRE_NOPARTIAL) != 0)
1111            fprintf(outfile, "Partial matching not supported\n");
1112    
1113        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
1114          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s\n",
1115            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1116            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1117            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1118            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1119              ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1120            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1121            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1122            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1123            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1124            ((get_options & PCRE_UTF8) != 0)? " utf8" : "");            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1125              ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "");
1126    
1127        if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)        if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)
1128          fprintf(outfile, "Case state changes\n");          fprintf(outfile, "Case state changes\n");
# Line 803  while (!done) Line 1138  while (!done)
1138        else        else
1139          {          {
1140          int ch = first_char & 255;          int ch = first_char & 255;
1141          char *caseless = ((first_char & REQ_CASELESS) == 0)?          const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1142            "" : " (caseless)";            "" : " (caseless)";
1143          if (isprint(ch))          if (isprint(ch))
1144            fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);            fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
# Line 818  while (!done) Line 1153  while (!done)
1153        else        else
1154          {          {
1155          int ch = need_char & 255;          int ch = need_char & 255;
1156          char *caseless = ((need_char & REQ_CASELESS) == 0)?          const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1157            "" : " (caseless)";            "" : " (caseless)";
1158          if (isprint(ch))          if (isprint(ch))
1159            fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);            fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1160          else          else
1161            fprintf(outfile, "Need char = %d%s\n", ch, caseless);            fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1162          }          }
       }  
1163    
1164      /* If /S was present, study the regexp to generate additional info to        /* Don't output study size; at present it is in any case a fixed
1165      help with the matching. */        value, but it varies, depending on the computer architecture, and
1166          so messes up the test suite. (And with the /F option, it might be
1167          flipped.) */
1168    
1169      if (do_study)        if (do_study)
       {  
       if (timeit)  
1170          {          {
1171          register int i;          if (extra == NULL)
1172          clock_t time_taken;            fprintf(outfile, "Study returned NULL\n");
1173          clock_t start_time = clock();          else
1174          for (i = 0; i < LOOPREPEAT; i++)            {
1175            extra = pcre_study(re, study_options, &error);            uschar *start_bits = NULL;
1176          time_taken = clock() - start_time;            new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1177          if (extra != NULL) free(extra);  
1178          fprintf(outfile, "  Study time %.3f milliseconds\n",            if (start_bits == NULL)
1179            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /              fprintf(outfile, "No starting byte set\n");
1180              (double)CLOCKS_PER_SEC);            else
1181                {
1182                int i;
1183                int c = 24;
1184                fprintf(outfile, "Starting byte set: ");
1185                for (i = 0; i < 256; i++)
1186                  {
1187                  if ((start_bits[i/8] & (1<<(i&7))) != 0)
1188                    {
1189                    if (c > 75)
1190                      {
1191                      fprintf(outfile, "\n  ");
1192                      c = 2;
1193                      }
1194                    if (isprint(i) && i != ' ')
1195                      {
1196                      fprintf(outfile, "%c ", i);
1197                      c += 2;
1198                      }
1199                    else
1200                      {
1201                      fprintf(outfile, "\\x%02x ", i);
1202                      c += 5;
1203                      }
1204                    }
1205                  }
1206                fprintf(outfile, "\n");
1207                }
1208              }
1209          }          }
1210          }
1211    
1212        extra = pcre_study(re, study_options, &error);      /* If the '>' option was present, we write out the regex to a file, and
1213        if (error != NULL)      that is all. The first 8 bytes of the file are the regex length and then
1214          fprintf(outfile, "Failed to study: %s\n", error);      the study length, in big-endian order. */
       else if (extra == NULL)  
         fprintf(outfile, "Study returned NULL\n");  
1215    
1216        else if (do_showinfo)      if (to_file != NULL)
1217          {
1218          FILE *f = fopen((char *)to_file, "wb");
1219          if (f == NULL)
1220            {
1221            fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1222            }
1223          else
1224          {          {
1225          size_t size;          uschar sbuf[8];
1226          uschar *start_bits = NULL;          sbuf[0] = (true_size >> 24)  & 255;
1227          new_info(re, extra, PCRE_INFO_STUDYSIZE, &size);          sbuf[1] = (true_size >> 16)  & 255;
1228          new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);          sbuf[2] = (true_size >>  8)  & 255;
1229          fprintf(outfile, "Study size = %d\n", size);          sbuf[3] = (true_size)  & 255;
1230          if (start_bits == NULL)  
1231            fprintf(outfile, "No starting character set\n");          sbuf[4] = (true_study_size >> 24)  & 255;
1232            sbuf[5] = (true_study_size >> 16)  & 255;
1233            sbuf[6] = (true_study_size >>  8)  & 255;
1234            sbuf[7] = (true_study_size)  & 255;
1235    
1236            if (fwrite(sbuf, 1, 8, f) < 8 ||
1237                fwrite(re, 1, true_size, f) < true_size)
1238              {
1239              fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1240              }
1241          else          else
1242            {            {
1243            int i;            fprintf(outfile, "Compiled regex written to %s\n", to_file);
1244            int c = 24;            if (extra != NULL)
           fprintf(outfile, "Starting character set: ");  
           for (i = 0; i < 256; i++)  
1245              {              {
1246              if ((start_bits[i/8] & (1<<(i%8))) != 0)              if (fwrite(extra->study_data, 1, true_study_size, f) <
1247                    true_study_size)
1248                {                {
1249                if (c > 75)                fprintf(outfile, "Write error on %s: %s\n", to_file,
1250                  {                  strerror(errno));
                 fprintf(outfile, "\n  ");  
                 c = 2;  
                 }  
               if (isprint(i) && i != ' ')  
                 {  
                 fprintf(outfile, "%c ", i);  
                 c += 2;  
                 }  
               else  
                 {  
                 fprintf(outfile, "\\x%02x ", i);  
                 c += 5;  
                 }  
1251                }                }
1252                else fprintf(outfile, "Study data written to %s\n", to_file);
1253              }              }
           fprintf(outfile, "\n");  
1254            }            }
1255            fclose(f);
1256          }          }
1257    
1258          new_free(re);
1259          if (extra != NULL) new_free(extra);
1260          if (tables != NULL) new_free((void *)tables);
1261          continue;  /* With next regex */
1262        }        }
1263      }      }        /* End of non-POSIX compile */
1264    
1265    /* Read data lines and test them */    /* Read data lines and test them */
1266    
# Line 911  while (!done) Line 1280  while (!done)
1280      int gmatched = 0;      int gmatched = 0;
1281      int start_offset = 0;      int start_offset = 0;
1282      int g_notempty = 0;      int g_notempty = 0;
1283        int use_dfa = 0;
1284    
1285      options = 0;      options = 0;
1286    
# Line 920  while (!done) Line 1290  while (!done)
1290      callout_count = 0;      callout_count = 0;
1291      callout_fail_count = 999999;      callout_fail_count = 999999;
1292      callout_fail_id = -1;      callout_fail_id = -1;
1293        show_malloc = 0;
1294    
1295      if (infile == stdin) printf("data> ");      if (infile == stdin) printf("data> ");
1296      if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)      if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)
1297        {        {
1298        done = 1;        done = 1;
1299        goto CONTINUE;        goto CONTINUE;
# Line 965  while (!done) Line 1336  while (!done)
1336    
1337          /* Handle \x{..} specially - new Perl thing for utf8 */          /* Handle \x{..} specially - new Perl thing for utf8 */
1338    
1339    #if !defined NOUTF8
1340          if (*p == '{')          if (*p == '{')
1341            {            {
1342            unsigned char *pt = p;            unsigned char *pt = p;
# Line 973  while (!done) Line 1345  while (!done)
1345              c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');              c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1346            if (*pt == '}')            if (*pt == '}')
1347              {              {
1348              unsigned char buffer[8];              unsigned char buff8[8];
1349              int ii, utn;              int ii, utn;
1350              utn = ord2utf8(c, buffer);              utn = ord2utf8(c, buff8);
1351              for (ii = 0; ii < utn - 1; ii++) *q++ = buffer[ii];              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1352              c = buffer[ii];   /* Last byte */              c = buff8[ii];   /* Last byte */
1353              p = pt + 1;              p = pt + 1;
1354              break;              break;
1355              }              }
1356            /* Not correct form; fall through */            /* Not correct form; fall through */
1357            }            }
1358    #endif
1359    
1360          /* Ordinary \x */          /* Ordinary \x */
1361    
# Line 994  while (!done) Line 1367  while (!done)
1367            }            }
1368          break;          break;
1369    
1370          case 0:   /* Allows for an empty line */          case 0:   /* \ followed by EOF allows for an empty line */
1371          p--;          p--;
1372          continue;          continue;
1373    
1374            case '>':
1375            while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1376            continue;
1377    
1378          case 'A':  /* Option setting */          case 'A':  /* Option setting */
1379          options |= PCRE_ANCHORED;          options |= PCRE_ANCHORED;
1380          continue;          continue;
# Line 1015  while (!done) Line 1392  while (!done)
1392          else if (isalnum(*p))          else if (isalnum(*p))
1393            {            {
1394            uschar name[256];            uschar name[256];
1395            uschar *pp = name;            uschar *npp = name;
1396            while (isalnum(*p)) *pp++ = *p++;            while (isalnum(*p)) *npp++ = *p++;
1397            *pp = 0;            *npp = 0;
1398            n = pcre_get_stringnumber(re, (char *)name);            n = pcre_get_stringnumber(re, (char *)name);
1399            if (n < 0)            if (n < 0)
1400              fprintf(outfile, "no parentheses with name \"%s\"\n", name);              fprintf(outfile, "no parentheses with name \"%s\"\n", name);
# Line 1059  while (!done) Line 1436  while (!done)
1436            }            }
1437          continue;          continue;
1438    
1439    #if !defined NODFA
1440            case 'D':
1441    #if !defined NOPOSIX
1442            if (posix || do_posix)
1443              printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1444            else
1445    #endif
1446              use_dfa = 1;
1447            continue;
1448    
1449            case 'F':
1450            options |= PCRE_DFA_SHORTEST;
1451            continue;
1452    #endif
1453    
1454          case 'G':          case 'G':
1455          if (isdigit(*p))          if (isdigit(*p))
1456            {            {
# Line 1068  while (!done) Line 1460  while (!done)
1460          else if (isalnum(*p))          else if (isalnum(*p))
1461            {            {
1462            uschar name[256];            uschar name[256];
1463            uschar *pp = name;            uschar *npp = name;
1464            while (isalnum(*p)) *pp++ = *p++;            while (isalnum(*p)) *npp++ = *p++;
1465            *pp = 0;            *npp = 0;
1466            n = pcre_get_stringnumber(re, (char *)name);            n = pcre_get_stringnumber(re, (char *)name);
1467            if (n < 0)            if (n < 0)
1468              fprintf(outfile, "no parentheses with name \"%s\"\n", name);              fprintf(outfile, "no parentheses with name \"%s\"\n", name);
# Line 1096  while (!done) Line 1488  while (!done)
1488            {            {
1489            size_offsets_max = n;            size_offsets_max = n;
1490            free(offsets);            free(offsets);
1491            use_offsets = offsets = malloc(size_offsets_max * sizeof(int));            use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1492            if (offsets == NULL)            if (offsets == NULL)
1493              {              {
1494              printf("** Failed to get %d bytes of memory for offsets vector\n",              printf("** Failed to get %d bytes of memory for offsets vector\n",
1495                size_offsets_max * sizeof(int));                size_offsets_max * sizeof(int));
1496              return 1;              yield = 1;
1497                goto EXIT;
1498              }              }
1499            }            }
1500          use_size_offsets = n;          use_size_offsets = n;
1501          if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */          if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */
1502          continue;          continue;
1503    
1504            case 'P':
1505            options |= PCRE_PARTIAL;
1506            continue;
1507    
1508    #if !defined NODFA
1509            case 'R':
1510            options |= PCRE_DFA_RESTART;
1511            continue;
1512    #endif
1513    
1514            case 'S':
1515            show_malloc = 1;
1516            continue;
1517    
1518          case 'Z':          case 'Z':
1519          options |= PCRE_NOTEOL;          options |= PCRE_NOTEOL;
1520          continue;          continue;
1521    
1522            case '?':
1523            options |= PCRE_NO_UTF8_CHECK;
1524            continue;
1525          }          }
1526        *q++ = c;        *q++ = c;
1527        }        }
1528      *q = 0;      *q = 0;
1529      len = q - dbuffer;      len = q - dbuffer;
1530    
1531        if ((all_use_dfa || use_dfa) && find_match_limit)
1532          {
1533          printf("**Match limit not relevant for DFA matching: ignored\n");
1534          find_match_limit = 0;
1535          }
1536    
1537      /* Handle matching via the POSIX interface, which does not      /* Handle matching via the POSIX interface, which does not
1538      support timing or playing with the match limit or callout data. */      support timing or playing with the match limit or callout data. */
1539    
# Line 1127  while (!done) Line 1544  while (!done)
1544        int eflags = 0;        int eflags = 0;
1545        regmatch_t *pmatch = NULL;        regmatch_t *pmatch = NULL;
1546        if (use_size_offsets > 0)        if (use_size_offsets > 0)
1547          pmatch = malloc(sizeof(regmatch_t) * use_size_offsets);          pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
1548        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1549        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1550    
# Line 1135  while (!done) Line 1552  while (!done)
1552    
1553        if (rc != 0)        if (rc != 0)
1554          {          {
1555          (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));          (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);
1556          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1557          }          }
1558        else        else
# Line 1174  while (!done) Line 1591  while (!done)
1591          register int i;          register int i;
1592          clock_t time_taken;          clock_t time_taken;
1593          clock_t start_time = clock();          clock_t start_time = clock();
1594    
1595    #if !defined NODFA
1596            if (all_use_dfa || use_dfa)
1597              {
1598              int workspace[1000];
1599              for (i = 0; i < LOOPREPEAT; i++)
1600                count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
1601                  options | g_notempty, use_offsets, use_size_offsets, workspace,
1602                  sizeof(workspace)/sizeof(int));
1603              }
1604            else
1605    #endif
1606    
1607          for (i = 0; i < LOOPREPEAT; i++)          for (i = 0; i < LOOPREPEAT; i++)
1608            count = pcre_exec(re, extra, (char *)bptr, len,            count = pcre_exec(re, extra, (char *)bptr, len,
1609              start_offset, options | g_notempty, use_offsets, use_size_offsets);              start_offset, options | g_notempty, use_offsets, use_size_offsets);
1610    
1611          time_taken = clock() - start_time;          time_taken = clock() - start_time;
1612          fprintf(outfile, "Execute time %.3f milliseconds\n",          fprintf(outfile, "Execute time %.3f milliseconds\n",
1613            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
# Line 1194  while (!done) Line 1625  while (!done)
1625    
1626          if (extra == NULL)          if (extra == NULL)
1627            {            {
1628            extra = malloc(sizeof(pcre_extra));            extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1629            extra->flags = 0;            extra->flags = 0;
1630            }            }
1631          extra->flags |= PCRE_EXTRA_MATCH_LIMIT;          extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
# Line 1210  while (!done) Line 1641  while (!done)
1641              min = mid;              min = mid;
1642              mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;              mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1643              }              }
1644            else if (count >= 0 || count == PCRE_ERROR_NOMATCH)            else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
1645                                     count == PCRE_ERROR_PARTIAL)
1646              {              {
1647              if (mid == min + 1)              if (mid == min + 1)
1648                {                {
# Line 1233  while (!done) Line 1665  while (!done)
1665          {          {
1666          if (extra == NULL)          if (extra == NULL)
1667            {            {
1668            extra = malloc(sizeof(pcre_extra));            extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1669            extra->flags = 0;            extra->flags = 0;
1670            }            }
1671          extra->flags |= PCRE_EXTRA_CALLOUT_DATA;          extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
1672          extra->callout_data = (void *)callout_data;          extra->callout_data = &callout_data;
1673          count = pcre_exec(re, extra, (char *)bptr, len, start_offset,          count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
1674            options | g_notempty, use_offsets, use_size_offsets);            options | g_notempty, use_offsets, use_size_offsets);
1675          extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;          extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
# Line 1246  while (!done) Line 1678  while (!done)
1678        /* The normal case is just to do the match once, with the default        /* The normal case is just to do the match once, with the default
1679        value of match_limit. */        value of match_limit. */
1680    
1681        else count = pcre_exec(re, extra, (char *)bptr, len,  #if !defined NODFA
1682          start_offset, options | g_notempty, use_offsets, use_size_offsets);        else if (all_use_dfa || use_dfa)
1683            {
1684            int workspace[1000];
1685            count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
1686              options | g_notempty, use_offsets, use_size_offsets, workspace,
1687              sizeof(workspace)/sizeof(int));
1688            if (count == 0)
1689              {
1690              fprintf(outfile, "Matched, but too many subsidiary matches\n");
1691              count = use_size_offsets/2;
1692              }
1693            }
1694    #endif
1695    
1696        if (count == 0)        else
1697          {          {
1698          fprintf(outfile, "Matched, but too many substrings\n");          count = pcre_exec(re, extra, (char *)bptr, len,
1699          count = use_size_offsets/3;            start_offset, options | g_notempty, use_offsets, use_size_offsets);
1700            if (count == 0)
1701              {
1702              fprintf(outfile, "Matched, but too many substrings\n");
1703              count = use_size_offsets/3;
1704              }
1705          }          }
1706    
1707        /* Matched */        /* Matched */
# Line 1334  while (!done) Line 1783  while (!done)
1783            }            }
1784          }          }
1785    
1786          /* There was a partial match */
1787    
1788          else if (count == PCRE_ERROR_PARTIAL)
1789            {
1790            fprintf(outfile, "Partial match");
1791    #if !defined NODFA
1792            if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
1793              fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
1794                bptr + use_offsets[0]);
1795    #endif
1796            fprintf(outfile, "\n");
1797            break;  /* Out of the /g loop */
1798            }
1799    
1800        /* Failed to match. If this is a /g or /G loop and we previously set        /* Failed to match. If this is a /g or /G loop and we previously set
1801        g_notempty after a null match, this is not necessarily the end.        g_notempty after a null match, this is not necessarily the end.
1802        We want to advance the start offset, and continue. Fudge the offset        We want to advance the start offset, and continue. In the case of UTF-8
1803        values to achieve this. We won't be at the end of the string - that        matching, the advance must be one character, not one byte. Fudge the
1804        was checked before setting g_notempty. */        offset values to achieve this. We won't be at the end of the string -
1805          that was checked before setting g_notempty. */
1806    
1807        else        else
1808          {          {
1809          if (g_notempty != 0)          if (g_notempty != 0)
1810            {            {
1811              int onechar = 1;
1812            use_offsets[0] = start_offset;            use_offsets[0] = start_offset;
1813            use_offsets[1] = start_offset + 1;            if (use_utf8)
1814                {
1815                while (start_offset + onechar < len)
1816                  {
1817                  int tb = bptr[start_offset+onechar];
1818                  if (tb <= 127) break;
1819                  tb &= 0xc0;
1820                  if (tb != 0 && tb != 0xc0) onechar++;
1821                  }
1822                }
1823              use_offsets[1] = start_offset + onechar;
1824            }            }
1825          else          else
1826            {            {
1827            if (gmatched == 0)   /* Error if no previous matches */            if (count == PCRE_ERROR_NOMATCH)
1828              {              {
1829              if (count == -1) fprintf(outfile, "No match\n");              if (gmatched == 0) fprintf(outfile, "No match\n");
               else fprintf(outfile, "Error %d\n", count);  
1830              }              }
1831              else fprintf(outfile, "Error %d\n", count);
1832            break;  /* Out of the /g loop */            break;  /* Out of the /g loop */
1833            }            }
1834          }          }
# Line 1396  while (!done) Line 1871  while (!done)
1871    if (posix || do_posix) regfree(&preg);    if (posix || do_posix) regfree(&preg);
1872  #endif  #endif
1873    
1874    if (re != NULL) free(re);    if (re != NULL) new_free(re);
1875    if (extra != NULL) free(extra);    if (extra != NULL) new_free(extra);
1876    if (tables != NULL)    if (tables != NULL)
1877      {      {
1878      free((void *)tables);      new_free((void *)tables);
1879      setlocale(LC_CTYPE, "C");      setlocale(LC_CTYPE, "C");
1880      }      }
1881    }    }
1882    
1883  fprintf(outfile, "\n");  if (infile == stdin) fprintf(outfile, "\n");
1884  return 0;  
1885    EXIT:
1886    
1887    if (infile != NULL && infile != stdin) fclose(infile);
1888    if (outfile != NULL && outfile != stdout) fclose(outfile);
1889    
1890    free(buffer);
1891    free(dbuffer);
1892    free(pbuffer);
1893    free(offsets);
1894    
1895    return yield;
1896  }  }
1897    
1898  /* End */  /* End of pcretest.c */

Legend:
Removed from v.65  
changed lines
  Added in v.85

  ViewVC Help
Powered by ViewVC 1.1.5