/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 79 by nigel, Sat Feb 24 21:40:52 2007 UTC revision 107 by ph10, Wed Mar 7 11:02:28 2007 UTC
# Line 44  POSSIBILITY OF SUCH DAMAGE. Line 44  POSSIBILITY OF SUCH DAMAGE.
44  #include <locale.h>  #include <locale.h>
45  #include <errno.h>  #include <errno.h>
46    
47    
48    /* A number of things vary for Windows builds. Originally, pcretest opened its
49    input and output without "b"; then I was told that "b" was needed in some
50    environments, so it was added for release 5.0 to both the input and output. (It
51    makes no difference on Unix-like systems.) Later I was told that it is wrong
52    for the input on Windows. I've now abstracted the modes into two macros that
53    are set here, to make it easier to fiddle with them, and removed "b" from the
54    input mode under Windows. */
55    
56    #if defined(_WIN32) || defined(WIN32)
57    #include <io.h>                /* For _setmode() */
58    #include <fcntl.h>             /* For _O_BINARY */
59    #define INPUT_MODE   "r"
60    #define OUTPUT_MODE  "wb"
61    
62    #else
63    #include <sys/time.h>          /* These two includes are needed */
64    #include <sys/resource.h>      /* for setrlimit(). */
65    #define INPUT_MODE   "rb"
66    #define OUTPUT_MODE  "wb"
67    #endif
68    
69    
70  #define PCRE_SPY        /* For Win32 build, import data, not export */  #define PCRE_SPY        /* For Win32 build, import data, not export */
71    
72  /* We need the internal info for displaying the results of pcre_study() and  /* We include pcre_internal.h because we need the internal info for displaying
73  other internal data; pcretest also uses some of the fixed tables, and generally  the results of pcre_study() and we also need to know about the internal
74  has "inside information" compared to a program that strictly follows the PCRE  macros, structures, and other internal data values; pcretest has "inside
75  API. */  information" compared to a program that strictly follows the PCRE API. */
76    
77  #include "pcre_internal.h"  #include "pcre_internal.h"
78    
79    /* We need access to the data tables that PCRE uses. So as not to have to keep
80    two copies, we include the source file here, changing the names of the external
81    symbols to prevent clashes. */
82    
83    #define _pcre_utf8_table1      utf8_table1
84    #define _pcre_utf8_table1_size utf8_table1_size
85    #define _pcre_utf8_table2      utf8_table2
86    #define _pcre_utf8_table3      utf8_table3
87    #define _pcre_utf8_table4      utf8_table4
88    #define _pcre_utt              utt
89    #define _pcre_utt_size         utt_size
90    #define _pcre_OP_lengths       OP_lengths
91    
92    #include "pcre_tables.c"
93    
94    /* We also need the pcre_printint() function for printing out compiled
95    patterns. This function is in a separate file so that it can be included in
96    pcre_compile.c when that module is compiled with debugging enabled.
97    
98    The definition of the macro PRINTABLE, which determines whether to print an
99    output character as-is or as a hex value when showing compiled patterns, is
100    contained in this file. We uses it here also, in cases when the locale has not
101    been explicitly changed, so as to get consistent output from systems that
102    differ in their output from isprint() even in the "C" locale. */
103    
104    #include "pcre_printint.src"
105    
106    #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
107    
108    
109  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
110  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 62  Makefile. */ Line 114  Makefile. */
114  #include "pcreposix.h"  #include "pcreposix.h"
115  #endif  #endif
116    
117  /* It is also possible, for the benefit of the version imported into Exim, to  /* It is also possible, for the benefit of the version currently imported into
118  build pcretest without support for UTF8 (define NOUTF8), without the interface  Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
119  to the DFA matcher (NODFA), and without the doublecheck of the old "info"  interface to the DFA matcher (NODFA), and without the doublecheck of the old
120  function (define NOINFOCHECK). */  "info" function (define NOINFOCHECK). In fact, we automatically cut out the
121    UTF8 support if PCRE is built without it. */
122    
123    #ifndef SUPPORT_UTF8
124    #ifndef NOUTF8
125    #define NOUTF8
126    #endif
127    #endif
128    
129    
130    /* Other parameters */
131    
132  #ifndef CLOCKS_PER_SEC  #ifndef CLOCKS_PER_SEC
133  #ifdef CLK_TCK  #ifdef CLK_TCK
# Line 76  function (define NOINFOCHECK). */ Line 137  function (define NOINFOCHECK). */
137  #endif  #endif
138  #endif  #endif
139    
140  #define LOOPREPEAT 500000  /* This is the default loop count for timing. */
141    
142  #define BUFFER_SIZE 30000  #define LOOPREPEAT 500000
 #define PBUFFER_SIZE BUFFER_SIZE  
 #define DBUFFER_SIZE BUFFER_SIZE  
143    
144    /* Static variables */
145    
146  static FILE *outfile;  static FILE *outfile;
147  static int log_store = 0;  static int log_store = 0;
# Line 90  static int callout_extra; Line 150  static int callout_extra;
150  static int callout_fail_count;  static int callout_fail_count;
151  static int callout_fail_id;  static int callout_fail_id;
152  static int first_callout;  static int first_callout;
153    static int locale_set = 0;
154  static int show_malloc;  static int show_malloc;
155  static int use_utf8;  static int use_utf8;
156  static size_t gotten_store;  static size_t gotten_store;
157    
158    /* The buffers grow automatically if very long input lines are encountered. */
159    
160    static int buffer_size = 50000;
161    static uschar *buffer = NULL;
162    static uschar *dbuffer = NULL;
163  static uschar *pbuffer = NULL;  static uschar *pbuffer = NULL;
164    
165    
166    
167  /*************************************************  /*************************************************
168    *        Read or extend an input line            *
169    *************************************************/
170    
171    /* Input lines are read into buffer, but both patterns and data lines can be
172    continued over multiple input lines. In addition, if the buffer fills up, we
173    want to automatically expand it so as to be able to handle extremely large
174    lines that are needed for certain stress tests. When the input buffer is
175    expanded, the other two buffers must also be expanded likewise, and the
176    contents of pbuffer, which are a copy of the input for callouts, must be
177    preserved (for when expansion happens for a data line). This is not the most
178    optimal way of handling this, but hey, this is just a test program!
179    
180    Arguments:
181      f            the file to read
182      start        where in buffer to start (this *must* be within buffer)
183    
184    Returns:       pointer to the start of new data
185                   could be a copy of start, or could be moved
186                   NULL if no data read and EOF reached
187    */
188    
189    static uschar *
190    extend_inputline(FILE *f, uschar *start)
191    {
192    uschar *here = start;
193    
194    for (;;)
195      {
196      int rlen = buffer_size - (here - buffer);
197    
198      if (rlen > 1000)
199        {
200        int dlen;
201        if (fgets((char *)here, rlen,  f) == NULL)
202          return (here == start)? NULL : start;
203        dlen = (int)strlen((char *)here);
204        if (dlen > 0 && here[dlen - 1] == '\n') return start;
205        here += dlen;
206        }
207    
208      else
209        {
210        int new_buffer_size = 2*buffer_size;
211        uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
212        uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
213        uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
214    
215        if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
216          {
217          fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
218          exit(1);
219          }
220    
221        memcpy(new_buffer, buffer, buffer_size);
222        memcpy(new_pbuffer, pbuffer, buffer_size);
223    
224        buffer_size = new_buffer_size;
225    
226        start = new_buffer + (start - buffer);
227        here = new_buffer + (here - buffer);
228    
229        free(buffer);
230        free(dbuffer);
231        free(pbuffer);
232    
233        buffer = new_buffer;
234        dbuffer = new_dbuffer;
235        pbuffer = new_pbuffer;
236        }
237      }
238    
239    return NULL;  /* Control never gets here */
240    }
241    
242    
243    
244    
245    
246    
247    
248    /*************************************************
249  *          Read number from string               *  *          Read number from string               *
250  *************************************************/  *************************************************/
251    
252  /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess  /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
253  around with conditional compilation, just do the job by hand. It is only used  around with conditional compilation, just do the job by hand. It is only used
254  for unpicking the -o argument, so just keep it simple.  for unpicking arguments, so just keep it simple.
255    
256  Arguments:  Arguments:
257    str           string to be converted    str           string to be converted
# Line 134  return(result); Line 281  return(result);
281  and returns the value of the character.  and returns the value of the character.
282    
283  Argument:  Argument:
284    buffer   a pointer to the byte vector    utf8bytes   a pointer to the byte vector
285    vptr     a pointer to an int to receive the value    vptr        a pointer to an int to receive the value
286    
287  Returns:   >  0 => the number of bytes consumed  Returns:      >  0 => the number of bytes consumed
288             -6 to 0 => malformed UTF-8 character at offset = (-return)                -6 to 0 => malformed UTF-8 character at offset = (-return)
289  */  */
290    
291  #if !defined NOUTF8  #if !defined NOUTF8
292    
293  static int  static int
294  utf82ord(unsigned char *buffer, int *vptr)  utf82ord(unsigned char *utf8bytes, int *vptr)
295  {  {
296  int c = *buffer++;  int c = *utf8bytes++;
297  int d = c;  int d = c;
298  int i, j, s;  int i, j, s;
299    
# Line 162  if (i == 0 || i == 6) return 0;        / Line 309  if (i == 0 || i == 6) return 0;        /
309  /* i now has a value in the range 1-5 */  /* i now has a value in the range 1-5 */
310    
311  s = 6*i;  s = 6*i;
312  d = (c & _pcre_utf8_table3[i]) << s;  d = (c & utf8_table3[i]) << s;
313    
314  for (j = 0; j < i; j++)  for (j = 0; j < i; j++)
315    {    {
316    c = *buffer++;    c = *utf8bytes++;
317    if ((c & 0xc0) != 0x80) return -(j+1);    if ((c & 0xc0) != 0x80) return -(j+1);
318    s -= 6;    s -= 6;
319    d |= (c & 0x3f) << s;    d |= (c & 0x3f) << s;
# Line 174  for (j = 0; j < i; j++) Line 321  for (j = 0; j < i; j++)
321    
322  /* Check that encoding was the correct unique one */  /* Check that encoding was the correct unique one */
323    
324  for (j = 0; j < _pcre_utf8_table1_size; j++)  for (j = 0; j < utf8_table1_size; j++)
325    if (d <= _pcre_utf8_table1[j]) break;    if (d <= utf8_table1[j]) break;
326  if (j != i) return -(i+1);  if (j != i) return -(i+1);
327    
328  /* Valid value */  /* Valid value */
# Line 189  return i+1; Line 336  return i+1;
336    
337    
338  /*************************************************  /*************************************************
339    *       Convert character value to UTF-8         *
340    *************************************************/
341    
342    /* This function takes an integer value in the range 0 - 0x7fffffff
343    and encodes it as a UTF-8 character in 0 to 6 bytes.
344    
345    Arguments:
346      cvalue     the character value
347      utf8bytes  pointer to buffer for result - at least 6 bytes long
348    
349    Returns:     number of characters placed in the buffer
350    */
351    
352    #if !defined NOUTF8
353    
354    static int
355    ord2utf8(int cvalue, uschar *utf8bytes)
356    {
357    register int i, j;
358    for (i = 0; i < utf8_table1_size; i++)
359      if (cvalue <= utf8_table1[i]) break;
360    utf8bytes += i;
361    for (j = i; j > 0; j--)
362     {
363     *utf8bytes-- = 0x80 | (cvalue & 0x3f);
364     cvalue >>= 6;
365     }
366    *utf8bytes = utf8_table2[i] | cvalue;
367    return i + 1;
368    }
369    
370    #endif
371    
372    
373    
374    /*************************************************
375  *             Print character string             *  *             Print character string             *
376  *************************************************/  *************************************************/
377    
# Line 198  chars without printing. */ Line 381  chars without printing. */
381    
382  static int pchars(unsigned char *p, int length, FILE *f)  static int pchars(unsigned char *p, int length, FILE *f)
383  {  {
384  int c;  int c = 0;
385  int yield = 0;  int yield = 0;
386    
387  while (length-- > 0)  while (length-- > 0)
# Line 212  while (length-- > 0) Line 395  while (length-- > 0)
395        {        {
396        length -= rc - 1;        length -= rc - 1;
397        p += rc;        p += rc;
398        if (c < 256 && isprint(c))        if (PRINTHEX(c))
399          {          {
400          if (f != NULL) fprintf(f, "%c", c);          if (f != NULL) fprintf(f, "%c", c);
401          yield++;          yield++;
402          }          }
403        else        else
404          {          {
405          int n;          int n = 4;
406          if (f != NULL) fprintf(f, "\\x{%02x}%n", c, &n);          if (f != NULL) fprintf(f, "\\x{%02x}", c);
407          yield += n;          yield += (n <= 0x000000ff)? 2 :
408                     (n <= 0x00000fff)? 3 :
409                     (n <= 0x0000ffff)? 4 :
410                     (n <= 0x000fffff)? 5 : 6;
411          }          }
412        continue;        continue;
413        }        }
# Line 230  while (length-- > 0) Line 416  while (length-- > 0)
416    
417     /* Not UTF-8, or malformed UTF-8  */     /* Not UTF-8, or malformed UTF-8  */
418    
419    if (isprint(c = *(p++)))    c = *p++;
420      if (PRINTHEX(c))
421      {      {
422      if (f != NULL) fprintf(f, "%c", c);      if (f != NULL) fprintf(f, "%c", c);
423      yield++;      yield++;
# Line 404  if ((rc = pcre_fullinfo(re, study, optio Line 591  if ((rc = pcre_fullinfo(re, study, optio
591  *         Byte flipping function                 *  *         Byte flipping function                 *
592  *************************************************/  *************************************************/
593    
594  static long int  static unsigned long int
595  byteflip(long int value, int n)  byteflip(unsigned long int value, int n)
596  {  {
597  if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);  if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
598  return ((value & 0x000000ff) << 24) |  return ((value & 0x000000ff) << 24) |
# Line 418  return ((value & 0x000000ff) << 24) | Line 605  return ((value & 0x000000ff) << 24) |
605    
606    
607  /*************************************************  /*************************************************
608    *        Check match or recursion limit          *
609    *************************************************/
610    
611    static int
612    check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
613      int start_offset, int options, int *use_offsets, int use_size_offsets,
614      int flag, unsigned long int *limit, int errnumber, const char *msg)
615    {
616    int count;
617    int min = 0;
618    int mid = 64;
619    int max = -1;
620    
621    extra->flags |= flag;
622    
623    for (;;)
624      {
625      *limit = mid;
626    
627      count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
628        use_offsets, use_size_offsets);
629    
630      if (count == errnumber)
631        {
632        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
633        min = mid;
634        mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
635        }
636    
637      else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
638                             count == PCRE_ERROR_PARTIAL)
639        {
640        if (mid == min + 1)
641          {
642          fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
643          break;
644          }
645        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
646        max = mid;
647        mid = (min + mid)/2;
648        }
649      else break;    /* Some other error */
650      }
651    
652    extra->flags &= ~flag;
653    return count;
654    }
655    
656    
657    
658    /*************************************************
659    *         Check newline indicator                *
660    *************************************************/
661    
662    /* This is used both at compile and run-time to check for <xxx> escapes, where
663    xxx is LF, CR, CRLF, or ANY. Print a message and return 0 if there is no match.
664    
665    Arguments:
666      p           points after the leading '<'
667      f           file for error message
668    
669    Returns:      appropriate PCRE_NEWLINE_xxx flags, or 0
670    */
671    
672    static int
673    check_newline(uschar *p, FILE *f)
674    {
675    if (strncmp((char *)p, "cr>", 3) == 0) return PCRE_NEWLINE_CR;
676    if (strncmp((char *)p, "lf>", 3) == 0) return PCRE_NEWLINE_LF;
677    if (strncmp((char *)p, "crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
678    if (strncmp((char *)p, "any>", 4) == 0) return PCRE_NEWLINE_ANY;
679    fprintf(f, "Unknown newline type at: <%s\n", p);
680    return 0;
681    }
682    
683    
684    
685    /*************************************************
686    *             Usage function                     *
687    *************************************************/
688    
689    static void
690    usage(void)
691    {
692    printf("Usage:     pcretest [options] [<input> [<output>]]\n");
693    printf("  -b       show compiled code (bytecode)\n");
694    printf("  -C       show PCRE compile-time options and exit\n");
695    printf("  -d       debug: show compiled code and information (-b and -i)\n");
696    #if !defined NODFA
697    printf("  -dfa     force DFA matching for all subjects\n");
698    #endif
699    printf("  -help    show usage information\n");
700    printf("  -i       show information about compiled patterns\n"
701           "  -m       output memory used information\n"
702           "  -o <n>   set size of offsets vector to <n>\n");
703    #if !defined NOPOSIX
704    printf("  -p       use POSIX interface\n");
705    #endif
706    printf("  -q       quiet: do not output PCRE version number at start\n");
707    printf("  -S <n>   set stack size to <n> megabytes\n");
708    printf("  -s       output store (memory) used information\n"
709           "  -t       time compilation and execution\n");
710    printf("  -t <n>   time compilation and execution, repeating <n> times\n");
711    printf("  -tm      time execution (matching) only\n");
712    printf("  -tm <n>  time execution (matching) only, repeating <n> times\n");
713    }
714    
715    
716    
717    /*************************************************
718  *                Main Program                    *  *                Main Program                    *
719  *************************************************/  *************************************************/
720    
# Line 432  int options = 0; Line 729  int options = 0;
729  int study_options = 0;  int study_options = 0;
730  int op = 1;  int op = 1;
731  int timeit = 0;  int timeit = 0;
732    int timeitm = 0;
733  int showinfo = 0;  int showinfo = 0;
734  int showstore = 0;  int showstore = 0;
735    int quiet = 0;
736  int size_offsets = 45;  int size_offsets = 45;
737  int size_offsets_max;  int size_offsets_max;
738  int *offsets = NULL;  int *offsets = NULL;
# Line 444  int debug = 0; Line 743  int debug = 0;
743  int done = 0;  int done = 0;
744  int all_use_dfa = 0;  int all_use_dfa = 0;
745  int yield = 0;  int yield = 0;
746    int stack_size;
747    
748  unsigned char *buffer;  /* These vectors store, end-to-end, a list of captured substring names. Assume
749  unsigned char *dbuffer;  that 1024 is plenty long enough for the few names we'll be testing. */
750    
751    uschar copynames[1024];
752    uschar getnames[1024];
753    
754    uschar *copynamesptr;
755    uschar *getnamesptr;
756    
757  /* Get buffers from malloc() so that Electric Fence will check their misuse  /* Get buffers from malloc() so that Electric Fence will check their misuse
758  when I am debugging. */  when I am debugging. They grow automatically when very long lines are read. */
759    
760  buffer = (unsigned char *)malloc(BUFFER_SIZE);  buffer = (unsigned char *)malloc(buffer_size);
761  dbuffer = (unsigned char *)malloc(DBUFFER_SIZE);  dbuffer = (unsigned char *)malloc(buffer_size);
762  pbuffer = (unsigned char *)malloc(PBUFFER_SIZE);  pbuffer = (unsigned char *)malloc(buffer_size);
   
 /* The outfile variable is static so that new_malloc can use it. The _setmode()  
 stuff is some magic that I don't understand, but which apparently does good  
 things in Windows. It's related to line terminations.  */  
763    
764  #if defined(_WIN32) || defined(WIN32)  /* The outfile variable is static so that new_malloc can use it. */
 _setmode( _fileno( stdout ), 0x8000 );  
 #endif  /* defined(_WIN32) || defined(WIN32) */  
765    
766  outfile = stdout;  outfile = stdout;
767    
768    /* The following  _setmode() stuff is some Windows magic that tells its runtime
769    library to translate CRLF into a single LF character. At least, that's what
770    I've been told: never having used Windows I take this all on trust. Originally
771    it set 0x8000, but then I was advised that _O_BINARY was better. */
772    
773    #if defined(_WIN32) || defined(WIN32)
774    _setmode( _fileno( stdout ), _O_BINARY );
775    #endif
776    
777  /* Scan options */  /* Scan options */
778    
779  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
# Line 473  while (argc > 1 && argv[op][0] == '-') Line 782  while (argc > 1 && argv[op][0] == '-')
782    
783    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
784      showstore = 1;      showstore = 1;
785    else if (strcmp(argv[op], "-t") == 0) timeit = 1;    else if (strcmp(argv[op], "-q") == 0) quiet = 1;
786      else if (strcmp(argv[op], "-b") == 0) debug = 1;
787    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
788    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
789  #if !defined NODFA  #if !defined NODFA
# Line 486  while (argc > 1 && argv[op][0] == '-') Line 796  while (argc > 1 && argv[op][0] == '-')
796      op++;      op++;
797      argc--;      argc--;
798      }      }
799      else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
800        {
801        int both = argv[op][2] == 0;
802        int temp;
803        if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
804                         *endptr == 0))
805          {
806          timeitm = temp;
807          op++;
808          argc--;
809          }
810        else timeitm = LOOPREPEAT;
811        if (both) timeit = timeitm;
812        }
813      else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
814          ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
815            *endptr == 0))
816        {
817    #if defined(_WIN32) || defined(WIN32)
818        printf("PCRE: -S not supported on this OS\n");
819        exit(1);
820    #else
821        int rc;
822        struct rlimit rlim;
823        getrlimit(RLIMIT_STACK, &rlim);
824        rlim.rlim_cur = stack_size * 1024 * 1024;
825        rc = setrlimit(RLIMIT_STACK, &rlim);
826        if (rc != 0)
827          {
828        printf("PCRE: setrlimit() failed with error %d\n", rc);
829        exit(1);
830          }
831        op++;
832        argc--;
833    #endif
834        }
835  #if !defined NOPOSIX  #if !defined NOPOSIX
836    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(argv[op], "-p") == 0) posix = 1;
837  #endif  #endif
# Line 499  while (argc > 1 && argv[op][0] == '-') Line 845  while (argc > 1 && argv[op][0] == '-')
845      (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);      (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
846      printf("  %sUnicode properties support\n", rc? "" : "No ");      printf("  %sUnicode properties support\n", rc? "" : "No ");
847      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
848      printf("  Newline character is %s\n", (rc == '\r')? "CR" : "LF");      printf("  Newline sequence is %s\n", (rc == '\r')? "CR" :
849          (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :
850          (rc == -1)? "ANY" : "???");
851      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
852      printf("  Internal link size = %d\n", rc);      printf("  Internal link size = %d\n", rc);
853      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
854      printf("  POSIX malloc threshold = %d\n", rc);      printf("  POSIX malloc threshold = %d\n", rc);
855      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
856      printf("  Default match limit = %d\n", rc);      printf("  Default match limit = %d\n", rc);
857        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);
858        printf("  Default recursion depth limit = %d\n", rc);
859      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
860      printf("  Match recursion uses %s\n", rc? "stack" : "heap");      printf("  Match recursion uses %s\n", rc? "stack" : "heap");
861      exit(0);      exit(0);
862      }      }
863      else if (strcmp(argv[op], "-help") == 0 ||
864               strcmp(argv[op], "--help") == 0)
865        {
866        usage();
867        goto EXIT;
868        }
869    else    else
870      {      {
871      printf("** Unknown or malformed option %s\n", argv[op]);      printf("** Unknown or malformed option %s\n", argv[op]);
872      printf("Usage:   pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");      usage();
     printf("  -C     show PCRE compile-time options and exit\n");  
     printf("  -d     debug: show compiled code; implies -i\n");  
 #if !defined NODFA  
     printf("  -dfa   force DFA matching for all subjects\n");  
 #endif  
     printf("  -i     show information about compiled pattern\n"  
            "  -m     output memory used information\n"  
            "  -o <n> set size of offsets vector to <n>\n");  
 #if !defined NOPOSIX  
     printf("  -p     use POSIX interface\n");  
 #endif  
     printf("  -s     output store (memory) used information\n"  
            "  -t     time compilation and execution\n");  
873      yield = 1;      yield = 1;
874      goto EXIT;      goto EXIT;
875      }      }
# Line 550  if (offsets == NULL) Line 893  if (offsets == NULL)
893    
894  if (argc > 1)  if (argc > 1)
895    {    {
896    infile = fopen(argv[op], "rb");    infile = fopen(argv[op], INPUT_MODE);
897    if (infile == NULL)    if (infile == NULL)
898      {      {
899      printf("** Failed to open %s\n", argv[op]);      printf("** Failed to open %s\n", argv[op]);
# Line 561  if (argc > 1) Line 904  if (argc > 1)
904    
905  if (argc > 2)  if (argc > 2)
906    {    {
907    outfile = fopen(argv[op+1], "wb");    outfile = fopen(argv[op+1], OUTPUT_MODE);
908    if (outfile == NULL)    if (outfile == NULL)
909      {      {
910      printf("** Failed to open %s\n", argv[op+1]);      printf("** Failed to open %s\n", argv[op+1]);
# Line 577  pcre_free = new_free; Line 920  pcre_free = new_free;
920  pcre_stack_malloc = stack_malloc;  pcre_stack_malloc = stack_malloc;
921  pcre_stack_free = stack_free;  pcre_stack_free = stack_free;
922    
923  /* Heading line, then prompt for first regex if stdin */  /* Heading line unless quiet, then prompt for first regex if stdin */
924    
925  fprintf(outfile, "PCRE version %s\n\n", pcre_version());  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
926    
927  /* Main loop */  /* Main loop */
928    
# Line 606  while (!done) Line 949  while (!done)
949    int do_showinfo = showinfo;    int do_showinfo = showinfo;
950    int do_showrest = 0;    int do_showrest = 0;
951    int do_flip = 0;    int do_flip = 0;
952    int erroroffset, len, delimiter;    int erroroffset, len, delimiter, poffset;
953    
954    use_utf8 = 0;    use_utf8 = 0;
955    
956    if (infile == stdin) printf("  re> ");    if (infile == stdin) printf("  re> ");
957    if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL) break;    if (extend_inputline(infile, buffer) == NULL) break;
958    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
959    fflush(outfile);    fflush(outfile);
960    
# Line 623  while (!done) Line 966  while (!done)
966    
967    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
968      {      {
969      unsigned long int magic;      unsigned long int magic, get_options;
970      uschar sbuf[8];      uschar sbuf[8];
971      FILE *f;      FILE *f;
972    
# Line 671  while (!done) Line 1014  while (!done)
1014    
1015      /* Need to know if UTF-8 for printing data strings */      /* Need to know if UTF-8 for printing data strings */
1016    
1017      new_info(re, NULL, PCRE_INFO_OPTIONS, &options);      new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1018      use_utf8 = (options & PCRE_UTF8) != 0;      use_utf8 = (get_options & PCRE_UTF8) != 0;
1019    
1020      /* Now see if there is any following study data */      /* Now see if there is any following study data */
1021    
# Line 716  while (!done) Line 1059  while (!done)
1059      }      }
1060    
1061    pp = p;    pp = p;
1062      poffset = p - buffer;
1063    
1064    for(;;)    for(;;)
1065      {      {
# Line 726  while (!done) Line 1070  while (!done)
1070        pp++;        pp++;
1071        }        }
1072      if (*pp != 0) break;      if (*pp != 0) break;
   
     len = BUFFER_SIZE - (pp - buffer);  
     if (len < 256)  
       {  
       fprintf(outfile, "** Expression too long - missing delimiter?\n");  
       goto SKIP_DATA;  
       }  
   
1073      if (infile == stdin) printf("    > ");      if (infile == stdin) printf("    > ");
1074      if (fgets((char *)pp, len, infile) == NULL)      if ((pp = extend_inputline(infile, pp)) == NULL)
1075        {        {
1076        fprintf(outfile, "** Unexpected EOF\n");        fprintf(outfile, "** Unexpected EOF\n");
1077        done = 1;        done = 1;
# Line 744  while (!done) Line 1080  while (!done)
1080      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1081      }      }
1082    
1083      /* The buffer may have moved while being extended; reset the start of data
1084      pointer to the correct relative point in the buffer. */
1085    
1086      p = buffer + poffset;
1087    
1088    /* If the first character after the delimiter is backslash, make    /* If the first character after the delimiter is backslash, make
1089    the pattern end with backslash. This is purely to provide a way    the pattern end with backslash. This is purely to provide a way
1090    of testing for the error message when a pattern ends with backslash. */    of testing for the error message when a pattern ends with backslash. */
# Line 775  while (!done) Line 1116  while (!done)
1116    
1117        case '+': do_showrest = 1; break;        case '+': do_showrest = 1; break;
1118        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
1119          case 'B': do_debug = 1; break;
1120        case 'C': options |= PCRE_AUTO_CALLOUT; break;        case 'C': options |= PCRE_AUTO_CALLOUT; break;
1121        case 'D': do_debug = do_showinfo = 1; break;        case 'D': do_debug = do_showinfo = 1; break;
1122        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1123        case 'F': do_flip = 1; break;        case 'F': do_flip = 1; break;
1124        case 'G': do_G = 1; break;        case 'G': do_G = 1; break;
1125        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
1126          case 'J': options |= PCRE_DUPNAMES; break;
1127        case 'M': log_store = 1; break;        case 'M': log_store = 1; break;
1128        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1129    
# Line 796  while (!done) Line 1139  while (!done)
1139    
1140        case 'L':        case 'L':
1141        ppp = pp;        ppp = pp;
1142        /* The '\r' test here is so that it works on Windows */        /* The '\r' test here is so that it works on Windows. */
1143        while (*ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;        /* The '0' test is just in case this is an unterminated line. */
1144          while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1145        *ppp = 0;        *ppp = 0;
1146        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1147          {          {
1148          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1149          goto SKIP_DATA;          goto SKIP_DATA;
1150          }          }
1151          locale_set = 1;
1152        tables = pcre_maketables();        tables = pcre_maketables();
1153        pp = ppp;        pp = ppp;
1154        break;        break;
# Line 815  while (!done) Line 1160  while (!done)
1160        *pp = 0;        *pp = 0;
1161        break;        break;
1162    
1163          case '<':
1164            {
1165            int x = check_newline(pp, outfile);
1166            if (x == 0) goto SKIP_DATA;
1167            options |= x;
1168            while (*pp++ != '>');
1169            }
1170          break;
1171    
1172        case '\r':                      /* So that it works in Windows */        case '\r':                      /* So that it works in Windows */
1173        case '\n':        case '\n':
1174        case ' ':        case ' ':
# Line 839  while (!done) Line 1193  while (!done)
1193      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1194      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1195      if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;      if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1196        if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1197        if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1198    
1199      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
1200    
1201      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 846  while (!done) Line 1203  while (!done)
1203    
1204      if (rc != 0)      if (rc != 0)
1205        {        {
1206        (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);        (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1207        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1208        goto SKIP_DATA;        goto SKIP_DATA;
1209        }        }
# Line 858  while (!done) Line 1215  while (!done)
1215  #endif  /* !defined NOPOSIX */  #endif  /* !defined NOPOSIX */
1216    
1217      {      {
1218      if (timeit)      if (timeit > 0)
1219        {        {
1220        register int i;        register int i;
1221        clock_t time_taken;        clock_t time_taken;
1222        clock_t start_time = clock();        clock_t start_time = clock();
1223        for (i = 0; i < LOOPREPEAT; i++)        for (i = 0; i < timeit; i++)
1224          {          {
1225          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1226          if (re != NULL) free(re);          if (re != NULL) free(re);
1227          }          }
1228        time_taken = clock() - start_time;        time_taken = clock() - start_time;
1229        fprintf(outfile, "Compile time %.3f milliseconds\n",        fprintf(outfile, "Compile time %.4f milliseconds\n",
1230          (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /          (((double)time_taken * 1000.0) / (double)timeit) /
1231            (double)CLOCKS_PER_SEC);            (double)CLOCKS_PER_SEC);
1232        }        }
1233    
# Line 887  while (!done) Line 1244  while (!done)
1244          {          {
1245          for (;;)          for (;;)
1246            {            {
1247            if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)            if (extend_inputline(infile, buffer) == NULL)
1248              {              {
1249              done = 1;              done = 1;
1250              goto CONTINUE;              goto CONTINUE;
# Line 922  while (!done) Line 1279  while (!done)
1279    
1280      if (do_study)      if (do_study)
1281        {        {
1282        if (timeit)        if (timeit > 0)
1283          {          {
1284          register int i;          register int i;
1285          clock_t time_taken;          clock_t time_taken;
1286          clock_t start_time = clock();          clock_t start_time = clock();
1287          for (i = 0; i < LOOPREPEAT; i++)          for (i = 0; i < timeit; i++)
1288            extra = pcre_study(re, study_options, &error);            extra = pcre_study(re, study_options, &error);
1289          time_taken = clock() - start_time;          time_taken = clock() - start_time;
1290          if (extra != NULL) free(extra);          if (extra != NULL) free(extra);
1291          fprintf(outfile, "  Study time %.3f milliseconds\n",          fprintf(outfile, "  Study time %.4f milliseconds\n",
1292            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /            (((double)time_taken * 1000.0) / (double)timeit) /
1293              (double)CLOCKS_PER_SEC);              (double)CLOCKS_PER_SEC);
1294          }          }
1295        extra = pcre_study(re, study_options, &error);        extra = pcre_study(re, study_options, &error);
# Line 975  while (!done) Line 1332  while (!done)
1332    
1333      SHOW_INFO:      SHOW_INFO:
1334    
1335        if (do_debug)
1336          {
1337          fprintf(outfile, "------------------------------------------------------------------\n");
1338          pcre_printint(re, outfile);
1339          }
1340    
1341      if (do_showinfo)      if (do_showinfo)
1342        {        {
1343        unsigned long int get_options, all_options;        unsigned long int get_options, all_options;
# Line 985  while (!done) Line 1348  while (!done)
1348        int nameentrysize, namecount;        int nameentrysize, namecount;
1349        const uschar *nametable;        const uschar *nametable;
1350    
       if (do_debug)  
         {  
         fprintf(outfile, "------------------------------------------------------------------\n");  
         _pcre_printint(re, outfile);  
         }  
   
1351        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1352        new_info(re, NULL, PCRE_INFO_SIZE, &size);        new_info(re, NULL, PCRE_INFO_SIZE, &size);
1353        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
# Line 1048  while (!done) Line 1405  while (!done)
1405        if (do_flip)        if (do_flip)
1406          {          {
1407          all_options = byteflip(all_options, sizeof(all_options));          all_options = byteflip(all_options, sizeof(all_options));
1408          }           }
1409    
1410        if ((all_options & PCRE_NOPARTIAL) != 0)        if ((all_options & PCRE_NOPARTIAL) != 0)
1411          fprintf(outfile, "Partial matching not supported\n");          fprintf(outfile, "Partial matching not supported\n");
1412    
1413        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
1414          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1415            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1416            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1417            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
# Line 1064  while (!done) Line 1421  while (!done)
1421            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1422            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1423            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1424              ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1425            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1426            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "");            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1427              ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1428    
1429        if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)        switch (get_options & PCRE_NEWLINE_BITS)
1430          fprintf(outfile, "Case state changes\n");          {
1431            case PCRE_NEWLINE_CR:
1432            fprintf(outfile, "Forced newline sequence: CR\n");
1433            break;
1434    
1435            case PCRE_NEWLINE_LF:
1436            fprintf(outfile, "Forced newline sequence: LF\n");
1437            break;
1438    
1439            case PCRE_NEWLINE_CRLF:
1440            fprintf(outfile, "Forced newline sequence: CRLF\n");
1441            break;
1442    
1443            case PCRE_NEWLINE_ANY:
1444            fprintf(outfile, "Forced newline sequence: ANY\n");
1445            break;
1446    
1447            default:
1448            break;
1449            }
1450    
1451        if (first_char == -1)        if (first_char == -1)
1452          {          {
1453          fprintf(outfile, "First char at start or follows \\n\n");          fprintf(outfile, "First char at start or follows newline\n");
1454          }          }
1455        else if (first_char < 0)        else if (first_char < 0)
1456          {          {
# Line 1083  while (!done) Line 1461  while (!done)
1461          int ch = first_char & 255;          int ch = first_char & 255;
1462          const char *caseless = ((first_char & REQ_CASELESS) == 0)?          const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1463            "" : " (caseless)";            "" : " (caseless)";
1464          if (isprint(ch))          if (PRINTHEX(ch))
1465            fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);            fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1466          else          else
1467            fprintf(outfile, "First char = %d%s\n", ch, caseless);            fprintf(outfile, "First char = %d%s\n", ch, caseless);
# Line 1098  while (!done) Line 1476  while (!done)
1476          int ch = need_char & 255;          int ch = need_char & 255;
1477          const char *caseless = ((need_char & REQ_CASELESS) == 0)?          const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1478            "" : " (caseless)";            "" : " (caseless)";
1479          if (isprint(ch))          if (PRINTHEX(ch))
1480            fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);            fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1481          else          else
1482            fprintf(outfile, "Need char = %d%s\n", ch, caseless);            fprintf(outfile, "Need char = %d%s\n", ch, caseless);
# Line 1134  while (!done) Line 1512  while (!done)
1512                    fprintf(outfile, "\n  ");                    fprintf(outfile, "\n  ");
1513                    c = 2;                    c = 2;
1514                    }                    }
1515                  if (isprint(i) && i != ' ')                  if (PRINTHEX(i) && i != ' ')
1516                    {                    {
1517                    fprintf(outfile, "%c ", i);                    fprintf(outfile, "%c ", i);
1518                    c += 2;                    c += 2;
# Line 1193  while (!done) Line 1571  while (!done)
1571                  strerror(errno));                  strerror(errno));
1572                }                }
1573              else fprintf(outfile, "Study data written to %s\n", to_file);              else fprintf(outfile, "Study data written to %s\n", to_file);
1574    
1575              }              }
1576            }            }
1577          fclose(f);          fclose(f);
# Line 1209  while (!done) Line 1588  while (!done)
1588    
1589    for (;;)    for (;;)
1590      {      {
1591      unsigned char *q;      uschar *q;
1592      unsigned char *bptr = dbuffer;      uschar *bptr = dbuffer;
1593      int *use_offsets = offsets;      int *use_offsets = offsets;
1594      int use_size_offsets = size_offsets;      int use_size_offsets = size_offsets;
1595      int callout_data = 0;      int callout_data = 0;
# Line 1227  while (!done) Line 1606  while (!done)
1606    
1607      options = 0;      options = 0;
1608    
1609        *copynames = 0;
1610        *getnames = 0;
1611    
1612        copynamesptr = copynames;
1613        getnamesptr = getnames;
1614    
1615      pcre_callout = callout;      pcre_callout = callout;
1616      first_callout = 1;      first_callout = 1;
1617      callout_extra = 0;      callout_extra = 0;
# Line 1235  while (!done) Line 1620  while (!done)
1620      callout_fail_id = -1;      callout_fail_id = -1;
1621      show_malloc = 0;      show_malloc = 0;
1622    
1623      if (infile == stdin) printf("data> ");      if (extra != NULL) extra->flags &=
1624      if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)        ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1625    
1626        len = 0;
1627        for (;;)
1628        {        {
1629        done = 1;        if (infile == stdin) printf("data> ");
1630        goto CONTINUE;        if (extend_inputline(infile, buffer + len) == NULL)
1631            {
1632            if (len > 0) break;
1633            done = 1;
1634            goto CONTINUE;
1635            }
1636          if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1637          len = (int)strlen((char *)buffer);
1638          if (buffer[len-1] == '\n') break;
1639        }        }
     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);  
1640    
     len = (int)strlen((char *)buffer);  
1641      while (len > 0 && isspace(buffer[len-1])) len--;      while (len > 0 && isspace(buffer[len-1])) len--;
1642      buffer[len] = 0;      buffer[len] = 0;
1643      if (len == 0) break;      if (len == 0) break;
# Line 1273  while (!done) Line 1667  while (!done)
1667          c -= '0';          c -= '0';
1668          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1669            c = c * 8 + *p++ - '0';            c = c * 8 + *p++ - '0';
1670    
1671    #if !defined NOUTF8
1672            if (use_utf8 && c > 255)
1673              {
1674              unsigned char buff8[8];
1675              int ii, utn;
1676              utn = ord2utf8(c, buff8);
1677              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1678              c = buff8[ii];   /* Last byte */
1679              }
1680    #endif
1681          break;          break;
1682    
1683          case 'x':          case 'x':
# Line 1290  while (!done) Line 1695  while (!done)
1695              {              {
1696              unsigned char buff8[8];              unsigned char buff8[8];
1697              int ii, utn;              int ii, utn;
1698              utn = _pcre_ord2utf8(c, buff8);              utn = ord2utf8(c, buff8);
1699              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1700              c = buff8[ii];   /* Last byte */              c = buff8[ii];   /* Last byte */
1701              p = pt + 1;              p = pt + 1;
# Line 1334  while (!done) Line 1739  while (!done)
1739            }            }
1740          else if (isalnum(*p))          else if (isalnum(*p))
1741            {            {
1742            uschar name[256];            uschar *npp = copynamesptr;
           uschar *npp = name;  
1743            while (isalnum(*p)) *npp++ = *p++;            while (isalnum(*p)) *npp++ = *p++;
1744              *npp++ = 0;
1745            *npp = 0;            *npp = 0;
1746            n = pcre_get_stringnumber(re, (char *)name);            n = pcre_get_stringnumber(re, (char *)copynamesptr);
1747            if (n < 0)            if (n < 0)
1748              fprintf(outfile, "no parentheses with name \"%s\"\n", name);              fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1749            else copystrings |= 1 << n;            copynamesptr = npp;
1750            }            }
1751          else if (*p == '+')          else if (*p == '+')
1752            {            {
# Line 1402  while (!done) Line 1807  while (!done)
1807            }            }
1808          else if (isalnum(*p))          else if (isalnum(*p))
1809            {            {
1810            uschar name[256];            uschar *npp = getnamesptr;
           uschar *npp = name;  
1811            while (isalnum(*p)) *npp++ = *p++;            while (isalnum(*p)) *npp++ = *p++;
1812              *npp++ = 0;
1813            *npp = 0;            *npp = 0;
1814            n = pcre_get_stringnumber(re, (char *)name);            n = pcre_get_stringnumber(re, (char *)getnamesptr);
1815            if (n < 0)            if (n < 0)
1816              fprintf(outfile, "no parentheses with name \"%s\"\n", name);              fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1817            else getstrings |= 1 << n;            getnamesptr = npp;
1818            }            }
1819          continue;          continue;
1820    
# Line 1448  while (!done) Line 1853  while (!done)
1853          options |= PCRE_PARTIAL;          options |= PCRE_PARTIAL;
1854          continue;          continue;
1855    
1856            case 'Q':
1857            while(isdigit(*p)) n = n * 10 + *p++ - '0';
1858            if (extra == NULL)
1859              {
1860              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1861              extra->flags = 0;
1862              }
1863            extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1864            extra->match_limit_recursion = n;
1865            continue;
1866    
1867            case 'q':
1868            while(isdigit(*p)) n = n * 10 + *p++ - '0';
1869            if (extra == NULL)
1870              {
1871              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1872              extra->flags = 0;
1873              }
1874            extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1875            extra->match_limit = n;
1876            continue;
1877    
1878  #if !defined NODFA  #if !defined NODFA
1879          case 'R':          case 'R':
1880          options |= PCRE_DFA_RESTART;          options |= PCRE_DFA_RESTART;
# Line 1465  while (!done) Line 1892  while (!done)
1892          case '?':          case '?':
1893          options |= PCRE_NO_UTF8_CHECK;          options |= PCRE_NO_UTF8_CHECK;
1894          continue;          continue;
1895    
1896            case '<':
1897              {
1898              int x = check_newline(p, outfile);
1899              if (x == 0) goto NEXT_DATA;
1900              options |= x;
1901              while (*p++ != '>');
1902              }
1903            continue;
1904          }          }
1905        *q++ = c;        *q++ = c;
1906        }        }
# Line 1495  while (!done) Line 1931  while (!done)
1931    
1932        if (rc != 0)        if (rc != 0)
1933          {          {
1934          (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);          (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1935          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1936          }          }
1937          else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
1938                  != 0)
1939            {
1940            fprintf(outfile, "Matched with REG_NOSUB\n");
1941            }
1942        else        else
1943          {          {
1944          size_t i;          size_t i;
# Line 1529  while (!done) Line 1970  while (!done)
1970    
1971      for (;; gmatched++)    /* Loop for /g or /G */      for (;; gmatched++)    /* Loop for /g or /G */
1972        {        {
1973        if (timeit)        if (timeitm > 0)
1974          {          {
1975          register int i;          register int i;
1976          clock_t time_taken;          clock_t time_taken;
# Line 1539  while (!done) Line 1980  while (!done)
1980          if (all_use_dfa || use_dfa)          if (all_use_dfa || use_dfa)
1981            {            {
1982            int workspace[1000];            int workspace[1000];
1983            for (i = 0; i < LOOPREPEAT; i++)            for (i = 0; i < timeitm; i++)
1984              count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,              count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
1985                options | g_notempty, use_offsets, use_size_offsets, workspace,                options | g_notempty, use_offsets, use_size_offsets, workspace,
1986                sizeof(workspace)/sizeof(int));                sizeof(workspace)/sizeof(int));
# Line 1547  while (!done) Line 1988  while (!done)
1988          else          else
1989  #endif  #endif
1990    
1991          for (i = 0; i < LOOPREPEAT; i++)          for (i = 0; i < timeitm; i++)
1992            count = pcre_exec(re, extra, (char *)bptr, len,            count = pcre_exec(re, extra, (char *)bptr, len,
1993              start_offset, options | g_notempty, use_offsets, use_size_offsets);              start_offset, options | g_notempty, use_offsets, use_size_offsets);
1994    
1995          time_taken = clock() - start_time;          time_taken = clock() - start_time;
1996          fprintf(outfile, "Execute time %.3f milliseconds\n",          fprintf(outfile, "Execute time %.4f milliseconds\n",
1997            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /            (((double)time_taken * 1000.0) / (double)timeitm) /
1998              (double)CLOCKS_PER_SEC);              (double)CLOCKS_PER_SEC);
1999          }          }
2000    
2001        /* If find_match_limit is set, we want to do repeated matches with        /* If find_match_limit is set, we want to do repeated matches with
2002        varying limits in order to find the minimum value. */        varying limits in order to find the minimum value for the match limit and
2003          for the recursion limit. */
2004    
2005        if (find_match_limit)        if (find_match_limit)
2006          {          {
         int min = 0;  
         int mid = 64;  
         int max = -1;  
   
2007          if (extra == NULL)          if (extra == NULL)
2008            {            {
2009            extra = (pcre_extra *)malloc(sizeof(pcre_extra));            extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2010            extra->flags = 0;            extra->flags = 0;
2011            }            }
         extra->flags |= PCRE_EXTRA_MATCH_LIMIT;  
   
         for (;;)  
           {  
           extra->match_limit = mid;  
           count = pcre_exec(re, extra, (char *)bptr, len, start_offset,  
             options | g_notempty, use_offsets, use_size_offsets);  
           if (count == PCRE_ERROR_MATCHLIMIT)  
             {  
             /* fprintf(outfile, "Testing match limit = %d\n", mid); */  
             min = mid;  
             mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;  
             }  
           else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||  
                                  count == PCRE_ERROR_PARTIAL)  
             {  
             if (mid == min + 1)  
               {  
               fprintf(outfile, "Minimum match limit = %d\n", mid);  
               break;  
               }  
             /* fprintf(outfile, "Testing match limit = %d\n", mid); */  
             max = mid;  
             mid = (min + mid)/2;  
             }  
           else break;    /* Some other error */  
           }  
2012    
2013          extra->flags &= ~PCRE_EXTRA_MATCH_LIMIT;          (void)check_match_limit(re, extra, bptr, len, start_offset,
2014              options|g_notempty, use_offsets, use_size_offsets,
2015              PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2016              PCRE_ERROR_MATCHLIMIT, "match()");
2017    
2018            count = check_match_limit(re, extra, bptr, len, start_offset,
2019              options|g_notempty, use_offsets, use_size_offsets,
2020              PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2021              PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2022          }          }
2023    
2024        /* If callout_data is set, use the interface with additional data */        /* If callout_data is set, use the interface with additional data */
# Line 1651  while (!done) Line 2070  while (!done)
2070    
2071        if (count >= 0)        if (count >= 0)
2072          {          {
2073          int i;          int i, maxcount;
2074    
2075    #if !defined NODFA
2076            if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2077    #endif
2078              maxcount = use_size_offsets/3;
2079    
2080            /* This is a check against a lunatic return value. */
2081    
2082            if (count > maxcount)
2083              {
2084              fprintf(outfile,
2085                "** PCRE error: returned count %d is too big for offset size %d\n",
2086                count, use_size_offsets);
2087              count = use_size_offsets/3;
2088              if (do_g || do_G)
2089                {
2090                fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2091                do_g = do_G = FALSE;        /* Break g/G loop */
2092                }
2093              }
2094    
2095          for (i = 0; i < count * 2; i += 2)          for (i = 0; i < count * 2; i += 2)
2096            {            {
2097            if (use_offsets[i] < 0)            if (use_offsets[i] < 0)
# Line 1679  while (!done) Line 2119  while (!done)
2119            {            {
2120            if ((copystrings & (1 << i)) != 0)            if ((copystrings & (1 << i)) != 0)
2121              {              {
2122              char copybuffer[16];              char copybuffer[256];
2123              int rc = pcre_copy_substring((char *)bptr, use_offsets, count,              int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2124                i, copybuffer, sizeof(copybuffer));                i, copybuffer, sizeof(copybuffer));
2125              if (rc < 0)              if (rc < 0)
# Line 1689  while (!done) Line 2129  while (!done)
2129              }              }
2130            }            }
2131    
2132            for (copynamesptr = copynames;
2133                 *copynamesptr != 0;
2134                 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2135              {
2136              char copybuffer[256];
2137              int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2138                count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2139              if (rc < 0)
2140                fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2141              else
2142                fprintf(outfile, "  C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2143              }
2144    
2145          for (i = 0; i < 32; i++)          for (i = 0; i < 32; i++)
2146            {            {
2147            if ((getstrings & (1 << i)) != 0)            if ((getstrings & (1 << i)) != 0)
# Line 1701  while (!done) Line 2154  while (!done)
2154              else              else
2155                {                {
2156                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
               /* free((void *)substring); */  
2157                pcre_free_substring(substring);                pcre_free_substring(substring);
2158                }                }
2159              }              }
2160            }            }
2161    
2162            for (getnamesptr = getnames;
2163                 *getnamesptr != 0;
2164                 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2165              {
2166              const char *substring;
2167              int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2168                count, (char *)getnamesptr, &substring);
2169              if (rc < 0)
2170                fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2171              else
2172                {
2173                fprintf(outfile, "  G %s (%d) %s\n", substring, rc, getnamesptr);
2174                pcre_free_substring(substring);
2175                }
2176              }
2177    
2178          if (getlist)          if (getlist)
2179            {            {
2180            const char **stringlist;            const char **stringlist;
# Line 1806  while (!done) Line 2274  while (!done)
2274          len -= use_offsets[1];          len -= use_offsets[1];
2275          }          }
2276        }  /* End of loop for /g and /G */        }  /* End of loop for /g and /G */
2277    
2278        NEXT_DATA: continue;
2279      }    /* End of loop for data lines */      }    /* End of loop for data lines */
2280    
2281    CONTINUE:    CONTINUE:
# Line 1820  while (!done) Line 2290  while (!done)
2290      {      {
2291      new_free((void *)tables);      new_free((void *)tables);
2292      setlocale(LC_CTYPE, "C");      setlocale(LC_CTYPE, "C");
2293        locale_set = 0;
2294      }      }
2295    }    }
2296    

Legend:
Removed from v.79  
changed lines
  Added in v.107

  ViewVC Help
Powered by ViewVC 1.1.5