/[pcre]/code/branches/pcre16/pcretest.c
ViewVC logotype

Diff of /code/branches/pcre16/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 75 by nigel, Sat Feb 24 21:40:37 2007 UTC revision 388 by ph10, Wed Mar 11 17:03:17 2009 UTC
# Line 4  Line 4 
4    
5  /* This program was hacked up as a tester for PCRE. I really should have  /* This program was hacked up as a tester for PCRE. I really should have
6  written it more tidily in the first place. Will I ever learn? It has grown and  written it more tidily in the first place. Will I ever learn? It has grown and
7  been extended and consequently is now rather untidy in places.  been extended and consequently is now rather, er, *very* untidy in places.
8    
9  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
10  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 36  POSSIBILITY OF SUCH DAMAGE. Line 36  POSSIBILITY OF SUCH DAMAGE.
36  */  */
37    
38    
39    #ifdef HAVE_CONFIG_H
40    #include "config.h"
41    #endif
42    
43  #include <ctype.h>  #include <ctype.h>
44  #include <stdio.h>  #include <stdio.h>
45  #include <string.h>  #include <string.h>
# Line 44  POSSIBILITY OF SUCH DAMAGE. Line 48  POSSIBILITY OF SUCH DAMAGE.
48  #include <locale.h>  #include <locale.h>
49  #include <errno.h>  #include <errno.h>
50    
51  /* We need the internal info for displaying the results of pcre_study(). Also  #ifdef SUPPORT_LIBREADLINE
52  for getting the opcodes for showing compiled code. */  #ifdef HAVE_UNISTD_H
53    #include <unistd.h>
54    #endif
55    #include <readline/readline.h>
56    #include <readline/history.h>
57    #endif
58    
59    
60    /* A number of things vary for Windows builds. Originally, pcretest opened its
61    input and output without "b"; then I was told that "b" was needed in some
62    environments, so it was added for release 5.0 to both the input and output. (It
63    makes no difference on Unix-like systems.) Later I was told that it is wrong
64    for the input on Windows. I've now abstracted the modes into two macros that
65    are set here, to make it easier to fiddle with them, and removed "b" from the
66    input mode under Windows. */
67    
68    #if defined(_WIN32) || defined(WIN32)
69    #include <io.h>                /* For _setmode() */
70    #include <fcntl.h>             /* For _O_BINARY */
71    #define INPUT_MODE   "r"
72    #define OUTPUT_MODE  "wb"
73    
74    #define isatty _isatty         /* This is what Windows calls them, I'm told */
75    #define fileno _fileno
76    
77    #else
78    #include <sys/time.h>          /* These two includes are needed */
79    #include <sys/resource.h>      /* for setrlimit(). */
80    #define INPUT_MODE   "rb"
81    #define OUTPUT_MODE  "wb"
82    #endif
83    
84    
85    /* We have to include pcre_internal.h because we need the internal info for
86    displaying the results of pcre_study() and we also need to know about the
87    internal macros, structures, and other internal data values; pcretest has
88    "inside information" compared to a program that strictly follows the PCRE API.
89    
90    Although pcre_internal.h does itself include pcre.h, we explicitly include it
91    here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
92    appropriately for an application, not for building PCRE. */
93    
94    #include "pcre.h"
95    #include "pcre_internal.h"
96    
97    /* We need access to some of the data tables that PCRE uses. So as not to have
98    to keep two copies, we include the source file here, changing the names of the
99    external symbols to prevent clashes. */
100    
101    #define _pcre_ucp_gentype      ucp_gentype
102    #define _pcre_utf8_table1      utf8_table1
103    #define _pcre_utf8_table1_size utf8_table1_size
104    #define _pcre_utf8_table2      utf8_table2
105    #define _pcre_utf8_table3      utf8_table3
106    #define _pcre_utf8_table4      utf8_table4
107    #define _pcre_utt              utt
108    #define _pcre_utt_size         utt_size
109    #define _pcre_utt_names        utt_names
110    #define _pcre_OP_lengths       OP_lengths
111    
112    #include "pcre_tables.c"
113    
114    /* We also need the pcre_printint() function for printing out compiled
115    patterns. This function is in a separate file so that it can be included in
116    pcre_compile.c when that module is compiled with debugging enabled.
117    
118    The definition of the macro PRINTABLE, which determines whether to print an
119    output character as-is or as a hex value when showing compiled patterns, is
120    contained in this file. We uses it here also, in cases when the locale has not
121    been explicitly changed, so as to get consistent output from systems that
122    differ in their output from isprint() even in the "C" locale. */
123    
124    #include "pcre_printint.src"
125    
126    #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
127    
 #define PCRE_SPY        /* For Win32 build, import data, not export */  
 #include "internal.h"  
128    
129  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
130  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 58  Makefile. */ Line 134  Makefile. */
134  #include "pcreposix.h"  #include "pcreposix.h"
135  #endif  #endif
136    
137    /* It is also possible, for the benefit of the version currently imported into
138    Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
139    interface to the DFA matcher (NODFA), and without the doublecheck of the old
140    "info" function (define NOINFOCHECK). In fact, we automatically cut out the
141    UTF8 support if PCRE is built without it. */
142    
143    #ifndef SUPPORT_UTF8
144    #ifndef NOUTF8
145    #define NOUTF8
146    #endif
147    #endif
148    
149    
150    /* Other parameters */
151    
152  #ifndef CLOCKS_PER_SEC  #ifndef CLOCKS_PER_SEC
153  #ifdef CLK_TCK  #ifdef CLK_TCK
154  #define CLOCKS_PER_SEC CLK_TCK  #define CLOCKS_PER_SEC CLK_TCK
# Line 66  Makefile. */ Line 157  Makefile. */
157  #endif  #endif
158  #endif  #endif
159    
160  #define LOOPREPEAT 500000  /* This is the default loop count for timing. */
161    
162  #define BUFFER_SIZE 30000  #define LOOPREPEAT 500000
 #define PBUFFER_SIZE BUFFER_SIZE  
 #define DBUFFER_SIZE BUFFER_SIZE  
163    
164    /* Static variables */
165    
166  static FILE *outfile;  static FILE *outfile;
167  static int log_store = 0;  static int log_store = 0;
# Line 79  static int callout_count; Line 169  static int callout_count;
169  static int callout_extra;  static int callout_extra;
170  static int callout_fail_count;  static int callout_fail_count;
171  static int callout_fail_id;  static int callout_fail_id;
172    static int debug_lengths;
173  static int first_callout;  static int first_callout;
174    static int locale_set = 0;
175  static int show_malloc;  static int show_malloc;
176  static int use_utf8;  static int use_utf8;
177  static size_t gotten_store;  static size_t gotten_store;
178    
179    /* The buffers grow automatically if very long input lines are encountered. */
180    
181    static int buffer_size = 50000;
182    static uschar *buffer = NULL;
183    static uschar *dbuffer = NULL;
184  static uschar *pbuffer = NULL;  static uschar *pbuffer = NULL;
185    
186    
 static const int utf8_table1[] = {  
   0x0000007f, 0x000007ff, 0x0000ffff, 0x001fffff, 0x03ffffff, 0x7fffffff};  
187    
188  static const int utf8_table2[] = {  /*************************************************
189    0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};  *        Read or extend an input line            *
190    *************************************************/
191    
192  static const int utf8_table3[] = {  /* Input lines are read into buffer, but both patterns and data lines can be
193    0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};  continued over multiple input lines. In addition, if the buffer fills up, we
194    want to automatically expand it so as to be able to handle extremely large
195    lines that are needed for certain stress tests. When the input buffer is
196    expanded, the other two buffers must also be expanded likewise, and the
197    contents of pbuffer, which are a copy of the input for callouts, must be
198    preserved (for when expansion happens for a data line). This is not the most
199    optimal way of handling this, but hey, this is just a test program!
200    
201    Arguments:
202      f            the file to read
203      start        where in buffer to start (this *must* be within buffer)
204      prompt       for stdin or readline()
205    
206    Returns:       pointer to the start of new data
207                   could be a copy of start, or could be moved
208                   NULL if no data read and EOF reached
209    */
210    
211    static uschar *
212    extend_inputline(FILE *f, uschar *start, const char *prompt)
213    {
214    uschar *here = start;
215    
216    for (;;)
217      {
218      int rlen = buffer_size - (here - buffer);
219    
220      if (rlen > 1000)
221        {
222        int dlen;
223    
224        /* If libreadline support is required, use readline() to read a line if the
225        input is a terminal. Note that readline() removes the trailing newline, so
226        we must put it back again, to be compatible with fgets(). */
227    
228    #ifdef SUPPORT_LIBREADLINE
229        if (isatty(fileno(f)))
230          {
231          size_t len;
232          char *s = readline(prompt);
233          if (s == NULL) return (here == start)? NULL : start;
234          len = strlen(s);
235          if (len > 0) add_history(s);
236          if (len > rlen - 1) len = rlen - 1;
237          memcpy(here, s, len);
238          here[len] = '\n';
239          here[len+1] = 0;
240          free(s);
241          }
242        else
243    #endif
244    
245        /* Read the next line by normal means, prompting if the file is stdin. */
246    
247          {
248          if (f == stdin) printf(prompt);
249          if (fgets((char *)here, rlen,  f) == NULL)
250            return (here == start)? NULL : start;
251          }
252    
253        dlen = (int)strlen((char *)here);
254        if (dlen > 0 && here[dlen - 1] == '\n') return start;
255        here += dlen;
256        }
257    
258      else
259        {
260        int new_buffer_size = 2*buffer_size;
261        uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
262        uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
263        uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
264    
265        if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
266          {
267          fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
268          exit(1);
269          }
270    
271        memcpy(new_buffer, buffer, buffer_size);
272        memcpy(new_pbuffer, pbuffer, buffer_size);
273    
274        buffer_size = new_buffer_size;
275    
276        start = new_buffer + (start - buffer);
277        here = new_buffer + (here - buffer);
278    
279        free(buffer);
280        free(dbuffer);
281        free(pbuffer);
282    
283        buffer = new_buffer;
284        dbuffer = new_dbuffer;
285        pbuffer = new_pbuffer;
286        }
287      }
288    
289    return NULL;  /* Control never gets here */
290    }
291    
292    
293    
 /*************************************************  
 *         Print compiled regex                   *  
 *************************************************/  
294    
 /* The code for doing this is held in a separate file that is also included in  
 pcre.c when it is compiled with the debug switch. It defines a function called  
 print_internals(), which uses a table of opcode lengths defined by the macro  
 OP_LENGTHS, whose name must be OP_lengths. It also uses a table that translates  
 Unicode property names to numbers; this is kept in a separate file. */  
   
 static uschar OP_lengths[] = { OP_LENGTHS };  
   
 #include "ucp.h"  
 #include "ucptypetable.c"  
 #include "printint.c"  
295    
296    
297    
# Line 122  static uschar OP_lengths[] = { OP_LENGTH Line 301  static uschar OP_lengths[] = { OP_LENGTH
301    
302  /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess  /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
303  around with conditional compilation, just do the job by hand. It is only used  around with conditional compilation, just do the job by hand. It is only used
304  for unpicking the -o argument, so just keep it simple.  for unpicking arguments, so just keep it simple.
305    
306  Arguments:  Arguments:
307    str           string to be converted    str           string to be converted
# Line 143  return(result); Line 322  return(result);
322    
323    
324    
 /*************************************************  
 *       Convert character value to UTF-8         *  
 *************************************************/  
   
 /* This function takes an integer value in the range 0 - 0x7fffffff  
 and encodes it as a UTF-8 character in 0 to 6 bytes.  
   
 Arguments:  
   cvalue     the character value  
   buffer     pointer to buffer for result - at least 6 bytes long  
   
 Returns:     number of characters placed in the buffer  
              -1 if input character is negative  
              0 if input character is positive but too big (only when  
              int is longer than 32 bits)  
 */  
   
 static int  
 ord2utf8(int cvalue, unsigned char *buffer)  
 {  
 register int i, j;  
 for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++)  
   if (cvalue <= utf8_table1[i]) break;  
 if (i >= sizeof(utf8_table1)/sizeof(int)) return 0;  
 if (cvalue < 0) return -1;  
   
 buffer += i;  
 for (j = i; j > 0; j--)  
  {  
  *buffer-- = 0x80 | (cvalue & 0x3f);  
  cvalue >>= 6;  
  }  
 *buffer = utf8_table2[i] | cvalue;  
 return i + 1;  
 }  
   
325    
326  /*************************************************  /*************************************************
327  *            Convert UTF-8 string to value       *  *            Convert UTF-8 string to value       *
# Line 188  return i + 1; Line 331  return i + 1;
331  and returns the value of the character.  and returns the value of the character.
332    
333  Argument:  Argument:
334    buffer   a pointer to the byte vector    utf8bytes   a pointer to the byte vector
335    vptr     a pointer to an int to receive the value    vptr        a pointer to an int to receive the value
336    
337  Returns:   >  0 => the number of bytes consumed  Returns:      >  0 => the number of bytes consumed
338             -6 to 0 => malformed UTF-8 character at offset = (-return)                -6 to 0 => malformed UTF-8 character at offset = (-return)
339  */  */
340    
341    #if !defined NOUTF8
342    
343  static int  static int
344  utf82ord(unsigned char *buffer, int *vptr)  utf82ord(unsigned char *utf8bytes, int *vptr)
345  {  {
346  int c = *buffer++;  int c = *utf8bytes++;
347  int d = c;  int d = c;
348  int i, j, s;  int i, j, s;
349    
# Line 218  d = (c & utf8_table3[i]) << s; Line 363  d = (c & utf8_table3[i]) << s;
363    
364  for (j = 0; j < i; j++)  for (j = 0; j < i; j++)
365    {    {
366    c = *buffer++;    c = *utf8bytes++;
367    if ((c & 0xc0) != 0x80) return -(j+1);    if ((c & 0xc0) != 0x80) return -(j+1);
368    s -= 6;    s -= 6;
369    d |= (c & 0x3f) << s;    d |= (c & 0x3f) << s;
# Line 226  for (j = 0; j < i; j++) Line 371  for (j = 0; j < i; j++)
371    
372  /* Check that encoding was the correct unique one */  /* Check that encoding was the correct unique one */
373    
374  for (j = 0; j < sizeof(utf8_table1)/sizeof(int); j++)  for (j = 0; j < utf8_table1_size; j++)
375    if (d <= utf8_table1[j]) break;    if (d <= utf8_table1[j]) break;
376  if (j != i) return -(i+1);  if (j != i) return -(i+1);
377    
# Line 236  if (j != i) return -(i+1); Line 381  if (j != i) return -(i+1);
381  return i+1;  return i+1;
382  }  }
383    
384    #endif
385    
386    
387    
388    /*************************************************
389    *       Convert character value to UTF-8         *
390    *************************************************/
391    
392    /* This function takes an integer value in the range 0 - 0x7fffffff
393    and encodes it as a UTF-8 character in 0 to 6 bytes.
394    
395    Arguments:
396      cvalue     the character value
397      utf8bytes  pointer to buffer for result - at least 6 bytes long
398    
399    Returns:     number of characters placed in the buffer
400    */
401    
402    #if !defined NOUTF8
403    
404    static int
405    ord2utf8(int cvalue, uschar *utf8bytes)
406    {
407    register int i, j;
408    for (i = 0; i < utf8_table1_size; i++)
409      if (cvalue <= utf8_table1[i]) break;
410    utf8bytes += i;
411    for (j = i; j > 0; j--)
412     {
413     *utf8bytes-- = 0x80 | (cvalue & 0x3f);
414     cvalue >>= 6;
415     }
416    *utf8bytes = utf8_table2[i] | cvalue;
417    return i + 1;
418    }
419    
420    #endif
421    
422    
423    
424  /*************************************************  /*************************************************
# Line 248  chars without printing. */ Line 431  chars without printing. */
431    
432  static int pchars(unsigned char *p, int length, FILE *f)  static int pchars(unsigned char *p, int length, FILE *f)
433  {  {
434  int c;  int c = 0;
435  int yield = 0;  int yield = 0;
436    
437  while (length-- > 0)  while (length-- > 0)
438    {    {
439    #if !defined NOUTF8
440    if (use_utf8)    if (use_utf8)
441      {      {
442      int rc = utf82ord(p, &c);      int rc = utf82ord(p, &c);
# Line 261  while (length-- > 0) Line 445  while (length-- > 0)
445        {        {
446        length -= rc - 1;        length -= rc - 1;
447        p += rc;        p += rc;
448        if (c < 256 && isprint(c))        if (PRINTHEX(c))
449          {          {
450          if (f != NULL) fprintf(f, "%c", c);          if (f != NULL) fprintf(f, "%c", c);
451          yield++;          yield++;
452          }          }
453        else        else
454          {          {
455          int n;          int n = 4;
456          if (f != NULL) fprintf(f, "\\x{%02x}%n", c, &n);          if (f != NULL) fprintf(f, "\\x{%02x}", c);
457          yield += n;          yield += (n <= 0x000000ff)? 2 :
458                     (n <= 0x00000fff)? 3 :
459                     (n <= 0x0000ffff)? 4 :
460                     (n <= 0x000fffff)? 5 : 6;
461          }          }
462        continue;        continue;
463        }        }
464      }      }
465    #endif
466    
467     /* Not UTF-8, or malformed UTF-8  */     /* Not UTF-8, or malformed UTF-8  */
468    
469    if (isprint(c = *(p++)))    c = *p++;
470      if (PRINTHEX(c))
471      {      {
472      if (f != NULL) fprintf(f, "%c", c);      if (f != NULL) fprintf(f, "%c", c);
473      yield++;      yield++;
# Line 403  static void *new_malloc(size_t size) Line 592  static void *new_malloc(size_t size)
592  void *block = malloc(size);  void *block = malloc(size);
593  gotten_store = size;  gotten_store = size;
594  if (show_malloc)  if (show_malloc)
595    fprintf(outfile, "malloc       %3d %p\n", size, block);    fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
596  return block;  return block;
597  }  }
598    
# Line 421  static void *stack_malloc(size_t size) Line 610  static void *stack_malloc(size_t size)
610  {  {
611  void *block = malloc(size);  void *block = malloc(size);
612  if (show_malloc)  if (show_malloc)
613    fprintf(outfile, "stack_malloc %3d %p\n", size, block);    fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
614  return block;  return block;
615  }  }
616    
# Line 452  if ((rc = pcre_fullinfo(re, study, optio Line 641  if ((rc = pcre_fullinfo(re, study, optio
641  *         Byte flipping function                 *  *         Byte flipping function                 *
642  *************************************************/  *************************************************/
643    
644  static long int  static unsigned long int
645  byteflip(long int value, int n)  byteflip(unsigned long int value, int n)
646  {  {
647  if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);  if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
648  return ((value & 0x000000ff) << 24) |  return ((value & 0x000000ff) << 24) |
# Line 466  return ((value & 0x000000ff) << 24) | Line 655  return ((value & 0x000000ff) << 24) |
655    
656    
657  /*************************************************  /*************************************************
658    *        Check match or recursion limit          *
659    *************************************************/
660    
661    static int
662    check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
663      int start_offset, int options, int *use_offsets, int use_size_offsets,
664      int flag, unsigned long int *limit, int errnumber, const char *msg)
665    {
666    int count;
667    int min = 0;
668    int mid = 64;
669    int max = -1;
670    
671    extra->flags |= flag;
672    
673    for (;;)
674      {
675      *limit = mid;
676    
677      count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
678        use_offsets, use_size_offsets);
679    
680      if (count == errnumber)
681        {
682        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
683        min = mid;
684        mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
685        }
686    
687      else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
688                             count == PCRE_ERROR_PARTIAL)
689        {
690        if (mid == min + 1)
691          {
692          fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
693          break;
694          }
695        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
696        max = mid;
697        mid = (min + mid)/2;
698        }
699      else break;    /* Some other error */
700      }
701    
702    extra->flags &= ~flag;
703    return count;
704    }
705    
706    
707    
708    /*************************************************
709    *         Case-independent strncmp() function    *
710    *************************************************/
711    
712    /*
713    Arguments:
714      s         first string
715      t         second string
716      n         number of characters to compare
717    
718    Returns:    < 0, = 0, or > 0, according to the comparison
719    */
720    
721    static int
722    strncmpic(uschar *s, uschar *t, int n)
723    {
724    while (n--)
725      {
726      int c = tolower(*s++) - tolower(*t++);
727      if (c) return c;
728      }
729    return 0;
730    }
731    
732    
733    
734    /*************************************************
735    *         Check newline indicator                *
736    *************************************************/
737    
738    /* This is used both at compile and run-time to check for <xxx> escapes, where
739    xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is
740    no match.
741    
742    Arguments:
743      p           points after the leading '<'
744      f           file for error message
745    
746    Returns:      appropriate PCRE_NEWLINE_xxx flags, or 0
747    */
748    
749    static int
750    check_newline(uschar *p, FILE *f)
751    {
752    if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
753    if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
754    if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
755    if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
756    if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
757    if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
758    if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
759    fprintf(f, "Unknown newline type at: <%s\n", p);
760    return 0;
761    }
762    
763    
764    
765    /*************************************************
766    *             Usage function                     *
767    *************************************************/
768    
769    static void
770    usage(void)
771    {
772    printf("Usage:     pcretest [options] [<input file> [<output file>]]\n\n");
773    printf("Input and output default to stdin and stdout.\n");
774    #ifdef SUPPORT_LIBREADLINE
775    printf("If input is a terminal, readline() is used to read from it.\n");
776    #else
777    printf("This version of pcretest is not linked with readline().\n");
778    #endif
779    printf("\nOptions:\n");
780    printf("  -b       show compiled code (bytecode)\n");
781    printf("  -C       show PCRE compile-time options and exit\n");
782    printf("  -d       debug: show compiled code and information (-b and -i)\n");
783    #if !defined NODFA
784    printf("  -dfa     force DFA matching for all subjects\n");
785    #endif
786    printf("  -help    show usage information\n");
787    printf("  -i       show information about compiled patterns\n"
788           "  -M       find MATCH_LIMIT minimum for each subject\n"
789           "  -m       output memory used information\n"
790           "  -o <n>   set size of offsets vector to <n>\n");
791    #if !defined NOPOSIX
792    printf("  -p       use POSIX interface\n");
793    #endif
794    printf("  -q       quiet: do not output PCRE version number at start\n");
795    printf("  -S <n>   set stack size to <n> megabytes\n");
796    printf("  -s       output store (memory) used information\n"
797           "  -t       time compilation and execution\n");
798    printf("  -t <n>   time compilation and execution, repeating <n> times\n");
799    printf("  -tm      time execution (matching) only\n");
800    printf("  -tm <n>  time execution (matching) only, repeating <n> times\n");
801    }
802    
803    
804    
805    /*************************************************
806  *                Main Program                    *  *                Main Program                    *
807  *************************************************/  *************************************************/
808    
# Line 478  int main(int argc, char **argv) Line 815  int main(int argc, char **argv)
815  FILE *infile = stdin;  FILE *infile = stdin;
816  int options = 0;  int options = 0;
817  int study_options = 0;  int study_options = 0;
818    int default_find_match_limit = FALSE;
819  int op = 1;  int op = 1;
820  int timeit = 0;  int timeit = 0;
821    int timeitm = 0;
822  int showinfo = 0;  int showinfo = 0;
823  int showstore = 0;  int showstore = 0;
824    int quiet = 0;
825  int size_offsets = 45;  int size_offsets = 45;
826  int size_offsets_max;  int size_offsets_max;
827  int *offsets;  int *offsets = NULL;
828  #if !defined NOPOSIX  #if !defined NOPOSIX
829  int posix = 0;  int posix = 0;
830  #endif  #endif
831  int debug = 0;  int debug = 0;
832  int done = 0;  int done = 0;
833    int all_use_dfa = 0;
834    int yield = 0;
835    int stack_size;
836    
837  unsigned char *buffer;  /* These vectors store, end-to-end, a list of captured substring names. Assume
838  unsigned char *dbuffer;  that 1024 is plenty long enough for the few names we'll be testing. */
839    
840    uschar copynames[1024];
841    uschar getnames[1024];
842    
843    uschar *copynamesptr;
844    uschar *getnamesptr;
845    
846  /* Get buffers from malloc() so that Electric Fence will check their misuse  /* Get buffers from malloc() so that Electric Fence will check their misuse
847  when I am debugging. */  when I am debugging. They grow automatically when very long lines are read. */
848    
849  buffer = (unsigned char *)malloc(BUFFER_SIZE);  buffer = (unsigned char *)malloc(buffer_size);
850  dbuffer = (unsigned char *)malloc(DBUFFER_SIZE);  dbuffer = (unsigned char *)malloc(buffer_size);
851  pbuffer = (unsigned char *)malloc(PBUFFER_SIZE);  pbuffer = (unsigned char *)malloc(buffer_size);
   
 /* The outfile variable is static so that new_malloc can use it. The _setmode()  
 stuff is some magic that I don't understand, but which apparently does good  
 things in Windows. It's related to line terminations.  */  
852    
853  #if defined(_WIN32) || defined(WIN32)  /* The outfile variable is static so that new_malloc can use it. */
 _setmode( _fileno( stdout ), 0x8000 );  
 #endif  /* defined(_WIN32) || defined(WIN32) */  
854    
855  outfile = stdout;  outfile = stdout;
856    
857    /* The following  _setmode() stuff is some Windows magic that tells its runtime
858    library to translate CRLF into a single LF character. At least, that's what
859    I've been told: never having used Windows I take this all on trust. Originally
860    it set 0x8000, but then I was advised that _O_BINARY was better. */
861    
862    #if defined(_WIN32) || defined(WIN32)
863    _setmode( _fileno( stdout ), _O_BINARY );
864    #endif
865    
866  /* Scan options */  /* Scan options */
867    
868  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
# Line 519  while (argc > 1 && argv[op][0] == '-') Line 871  while (argc > 1 && argv[op][0] == '-')
871    
872    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
873      showstore = 1;      showstore = 1;
874    else if (strcmp(argv[op], "-t") == 0) timeit = 1;    else if (strcmp(argv[op], "-q") == 0) quiet = 1;
875      else if (strcmp(argv[op], "-b") == 0) debug = 1;
876    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
877    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
878      else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
879    #if !defined NODFA
880      else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
881    #endif
882    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
883        ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),        ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
884          *endptr == 0))          *endptr == 0))
# Line 529  while (argc > 1 && argv[op][0] == '-') Line 886  while (argc > 1 && argv[op][0] == '-')
886      op++;      op++;
887      argc--;      argc--;
888      }      }
889      else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
890        {
891        int both = argv[op][2] == 0;
892        int temp;
893        if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
894                         *endptr == 0))
895          {
896          timeitm = temp;
897          op++;
898          argc--;
899          }
900        else timeitm = LOOPREPEAT;
901        if (both) timeit = timeitm;
902        }
903      else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
904          ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
905            *endptr == 0))
906        {
907    #if defined(_WIN32) || defined(WIN32)
908        printf("PCRE: -S not supported on this OS\n");
909        exit(1);
910    #else
911        int rc;
912        struct rlimit rlim;
913        getrlimit(RLIMIT_STACK, &rlim);
914        rlim.rlim_cur = stack_size * 1024 * 1024;
915        rc = setrlimit(RLIMIT_STACK, &rlim);
916        if (rc != 0)
917          {
918        printf("PCRE: setrlimit() failed with error %d\n", rc);
919        exit(1);
920          }
921        op++;
922        argc--;
923    #endif
924        }
925  #if !defined NOPOSIX  #if !defined NOPOSIX
926    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(argv[op], "-p") == 0) posix = 1;
927  #endif  #endif
928    else if (strcmp(argv[op], "-C") == 0)    else if (strcmp(argv[op], "-C") == 0)
929      {      {
930      int rc;      int rc;
931        unsigned long int lrc;
932      printf("PCRE version %s\n", pcre_version());      printf("PCRE version %s\n", pcre_version());
933      printf("Compiled with\n");      printf("Compiled with\n");
934      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
# Line 542  while (argc > 1 && argv[op][0] == '-') Line 936  while (argc > 1 && argv[op][0] == '-')
936      (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);      (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
937      printf("  %sUnicode properties support\n", rc? "" : "No ");      printf("  %sUnicode properties support\n", rc? "" : "No ");
938      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
939      printf("  Newline character is %s\n", (rc == '\r')? "CR" : "LF");      printf("  Newline sequence is %s\n", (rc == '\r')? "CR" :
940          (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :
941          (rc == -2)? "ANYCRLF" :
942          (rc == -1)? "ANY" : "???");
943        (void)pcre_config(PCRE_CONFIG_BSR, &rc);
944        printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :
945                                         "all Unicode newlines");
946      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
947      printf("  Internal link size = %d\n", rc);      printf("  Internal link size = %d\n", rc);
948      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
949      printf("  POSIX malloc threshold = %d\n", rc);      printf("  POSIX malloc threshold = %d\n", rc);
950      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
951      printf("  Default match limit = %d\n", rc);      printf("  Default match limit = %ld\n", lrc);
952        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
953        printf("  Default recursion depth limit = %ld\n", lrc);
954      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
955      printf("  Match recursion uses %s\n", rc? "stack" : "heap");      printf("  Match recursion uses %s\n", rc? "stack" : "heap");
956      exit(0);      goto EXIT;
957        }
958      else if (strcmp(argv[op], "-help") == 0 ||
959               strcmp(argv[op], "--help") == 0)
960        {
961        usage();
962        goto EXIT;
963      }      }
964    else    else
965      {      {
966      printf("** Unknown or malformed option %s\n", argv[op]);      printf("** Unknown or malformed option %s\n", argv[op]);
967      printf("Usage:   pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");      usage();
968      printf("  -C     show PCRE compile-time options and exit\n");      yield = 1;
969      printf("  -d     debug: show compiled code; implies -i\n"      goto EXIT;
            "  -i     show information about compiled pattern\n"  
            "  -m     output memory used information\n"  
            "  -o <n> set size of offsets vector to <n>\n");  
 #if !defined NOPOSIX  
     printf("  -p     use POSIX interface\n");  
 #endif  
     printf("  -s     output store (memory) used information\n"  
            "  -t     time compilation and execution\n");  
     return 1;  
970      }      }
971    op++;    op++;
972    argc--;    argc--;
# Line 580  offsets = (int *)malloc(size_offsets_max Line 979  offsets = (int *)malloc(size_offsets_max
979  if (offsets == NULL)  if (offsets == NULL)
980    {    {
981    printf("** Failed to get %d bytes of memory for offsets vector\n",    printf("** Failed to get %d bytes of memory for offsets vector\n",
982      size_offsets_max * sizeof(int));      (int)(size_offsets_max * sizeof(int)));
983    return 1;    yield = 1;
984      goto EXIT;
985    }    }
986    
987  /* Sort out the input and output files */  /* Sort out the input and output files */
988    
989  if (argc > 1)  if (argc > 1)
990    {    {
991    infile = fopen(argv[op], "rb");    infile = fopen(argv[op], INPUT_MODE);
992    if (infile == NULL)    if (infile == NULL)
993      {      {
994      printf("** Failed to open %s\n", argv[op]);      printf("** Failed to open %s\n", argv[op]);
995      return 1;      yield = 1;
996        goto EXIT;
997      }      }
998    }    }
999    
1000  if (argc > 2)  if (argc > 2)
1001    {    {
1002    outfile = fopen(argv[op+1], "wb");    outfile = fopen(argv[op+1], OUTPUT_MODE);
1003    if (outfile == NULL)    if (outfile == NULL)
1004      {      {
1005      printf("** Failed to open %s\n", argv[op+1]);      printf("** Failed to open %s\n", argv[op+1]);
1006      return 1;      yield = 1;
1007        goto EXIT;
1008      }      }
1009    }    }
1010    
# Line 613  pcre_free = new_free; Line 1015  pcre_free = new_free;
1015  pcre_stack_malloc = stack_malloc;  pcre_stack_malloc = stack_malloc;
1016  pcre_stack_free = stack_free;  pcre_stack_free = stack_free;
1017    
1018  /* Heading line, then prompt for first regex if stdin */  /* Heading line unless quiet, then prompt for first regex if stdin */
1019    
1020  fprintf(outfile, "PCRE version %s\n\n", pcre_version());  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
1021    
1022  /* Main loop */  /* Main loop */
1023    
# Line 642  while (!done) Line 1044  while (!done)
1044    int do_showinfo = showinfo;    int do_showinfo = showinfo;
1045    int do_showrest = 0;    int do_showrest = 0;
1046    int do_flip = 0;    int do_flip = 0;
1047    int erroroffset, len, delimiter;    int erroroffset, len, delimiter, poffset;
1048    
1049    use_utf8 = 0;    use_utf8 = 0;
1050      debug_lengths = 1;
1051    
1052    if (infile == stdin) printf("  re> ");    if (extend_inputline(infile, buffer, "  re> ") == NULL) break;
   if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL) break;  
1053    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1054    fflush(outfile);    fflush(outfile);
1055    
# Line 659  while (!done) Line 1061  while (!done)
1061    
1062    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1063      {      {
1064      unsigned long int magic;      unsigned long int magic, get_options;
1065      uschar sbuf[8];      uschar sbuf[8];
1066      FILE *f;      FILE *f;
1067    
# Line 707  while (!done) Line 1109  while (!done)
1109    
1110      /* Need to know if UTF-8 for printing data strings */      /* Need to know if UTF-8 for printing data strings */
1111    
1112      new_info(re, NULL, PCRE_INFO_OPTIONS, &options);      new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1113      use_utf8 = (options & PCRE_UTF8) != 0;      use_utf8 = (get_options & PCRE_UTF8) != 0;
1114    
1115      /* Now see if there is any following study data */      /* Now see if there is any following study data */
1116    
# Line 747  while (!done) Line 1149  while (!done)
1149    
1150    if (isalnum(delimiter) || delimiter == '\\')    if (isalnum(delimiter) || delimiter == '\\')
1151      {      {
1152      fprintf(outfile, "** Delimiter must not be alphameric or \\\n");      fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1153      goto SKIP_DATA;      goto SKIP_DATA;
1154      }      }
1155    
1156    pp = p;    pp = p;
1157      poffset = p - buffer;
1158    
1159    for(;;)    for(;;)
1160      {      {
# Line 762  while (!done) Line 1165  while (!done)
1165        pp++;        pp++;
1166        }        }
1167      if (*pp != 0) break;      if (*pp != 0) break;
1168        if ((pp = extend_inputline(infile, pp, "    > ")) == NULL)
     len = BUFFER_SIZE - (pp - buffer);  
     if (len < 256)  
       {  
       fprintf(outfile, "** Expression too long - missing delimiter?\n");  
       goto SKIP_DATA;  
       }  
   
     if (infile == stdin) printf("    > ");  
     if (fgets((char *)pp, len, infile) == NULL)  
1169        {        {
1170        fprintf(outfile, "** Unexpected EOF\n");        fprintf(outfile, "** Unexpected EOF\n");
1171        done = 1;        done = 1;
# Line 780  while (!done) Line 1174  while (!done)
1174      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1175      }      }
1176    
1177      /* The buffer may have moved while being extended; reset the start of data
1178      pointer to the correct relative point in the buffer. */
1179    
1180      p = buffer + poffset;
1181    
1182    /* If the first character after the delimiter is backslash, make    /* If the first character after the delimiter is backslash, make
1183    the pattern end with backslash. This is purely to provide a way    the pattern end with backslash. This is purely to provide a way
1184    of testing for the error message when a pattern ends with backslash. */    of testing for the error message when a pattern ends with backslash. */
# Line 802  while (!done) Line 1201  while (!done)
1201      {      {
1202      switch (*pp++)      switch (*pp++)
1203        {        {
1204          case 'f': options |= PCRE_FIRSTLINE; break;
1205        case 'g': do_g = 1; break;        case 'g': do_g = 1; break;
1206        case 'i': options |= PCRE_CASELESS; break;        case 'i': options |= PCRE_CASELESS; break;
1207        case 'm': options |= PCRE_MULTILINE; break;        case 'm': options |= PCRE_MULTILINE; break;
# Line 810  while (!done) Line 1210  while (!done)
1210    
1211        case '+': do_showrest = 1; break;        case '+': do_showrest = 1; break;
1212        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
1213          case 'B': do_debug = 1; break;
1214        case 'C': options |= PCRE_AUTO_CALLOUT; break;        case 'C': options |= PCRE_AUTO_CALLOUT; break;
1215        case 'D': do_debug = do_showinfo = 1; break;        case 'D': do_debug = do_showinfo = 1; break;
1216        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1217        case 'F': do_flip = 1; break;        case 'F': do_flip = 1; break;
1218        case 'G': do_G = 1; break;        case 'G': do_G = 1; break;
1219        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
1220          case 'J': options |= PCRE_DUPNAMES; break;
1221        case 'M': log_store = 1; break;        case 'M': log_store = 1; break;
1222        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1223    
# Line 826  while (!done) Line 1228  while (!done)
1228        case 'S': do_study = 1; break;        case 'S': do_study = 1; break;
1229        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
1230        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
1231          case 'Z': debug_lengths = 0; break;
1232        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1233        case '?': options |= PCRE_NO_UTF8_CHECK; break;        case '?': options |= PCRE_NO_UTF8_CHECK; break;
1234    
1235        case 'L':        case 'L':
1236        ppp = pp;        ppp = pp;
1237        while (*ppp != '\n' && *ppp != ' ') ppp++;        /* The '\r' test here is so that it works on Windows. */
1238          /* The '0' test is just in case this is an unterminated line. */
1239          while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1240        *ppp = 0;        *ppp = 0;
1241        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1242          {          {
1243          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1244          goto SKIP_DATA;          goto SKIP_DATA;
1245          }          }
1246          locale_set = 1;
1247        tables = pcre_maketables();        tables = pcre_maketables();
1248        pp = ppp;        pp = ppp;
1249        break;        break;
# Line 849  while (!done) Line 1255  while (!done)
1255        *pp = 0;        *pp = 0;
1256        break;        break;
1257    
1258        case '\n': case ' ': break;        case '<':
1259            {
1260            if (strncmp((char *)pp, "JS>", 3) == 0)
1261              {
1262              options |= PCRE_JAVASCRIPT_COMPAT;
1263              pp += 3;
1264              }
1265            else
1266              {
1267              int x = check_newline(pp, outfile);
1268              if (x == 0) goto SKIP_DATA;
1269              options |= x;
1270              while (*pp++ != '>');
1271              }
1272            }
1273          break;
1274    
1275          case '\r':                      /* So that it works in Windows */
1276          case '\n':
1277          case ' ':
1278          break;
1279    
1280        default:        default:
1281        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
# Line 869  while (!done) Line 1295  while (!done)
1295    
1296      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1297      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1298        if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1299        if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1300        if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1301    
1302      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
1303    
1304      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 876  while (!done) Line 1306  while (!done)
1306    
1307      if (rc != 0)      if (rc != 0)
1308        {        {
1309        (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);        (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1310        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1311        goto SKIP_DATA;        goto SKIP_DATA;
1312        }        }
# Line 888  while (!done) Line 1318  while (!done)
1318  #endif  /* !defined NOPOSIX */  #endif  /* !defined NOPOSIX */
1319    
1320      {      {
1321      if (timeit)      if (timeit > 0)
1322        {        {
1323        register int i;        register int i;
1324        clock_t time_taken;        clock_t time_taken;
1325        clock_t start_time = clock();        clock_t start_time = clock();
1326        for (i = 0; i < LOOPREPEAT; i++)        for (i = 0; i < timeit; i++)
1327          {          {
1328          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1329          if (re != NULL) free(re);          if (re != NULL) free(re);
1330          }          }
1331        time_taken = clock() - start_time;        time_taken = clock() - start_time;
1332        fprintf(outfile, "Compile time %.3f milliseconds\n",        fprintf(outfile, "Compile time %.4f milliseconds\n",
1333          (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /          (((double)time_taken * 1000.0) / (double)timeit) /
1334            (double)CLOCKS_PER_SEC);            (double)CLOCKS_PER_SEC);
1335        }        }
1336    
# Line 917  while (!done) Line 1347  while (!done)
1347          {          {
1348          for (;;)          for (;;)
1349            {            {
1350            if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)            if (extend_inputline(infile, buffer, NULL) == NULL)
1351              {              {
1352              done = 1;              done = 1;
1353              goto CONTINUE;              goto CONTINUE;
# Line 952  while (!done) Line 1382  while (!done)
1382    
1383      if (do_study)      if (do_study)
1384        {        {
1385        if (timeit)        if (timeit > 0)
1386          {          {
1387          register int i;          register int i;
1388          clock_t time_taken;          clock_t time_taken;
1389          clock_t start_time = clock();          clock_t start_time = clock();
1390          for (i = 0; i < LOOPREPEAT; i++)          for (i = 0; i < timeit; i++)
1391            extra = pcre_study(re, study_options, &error);            extra = pcre_study(re, study_options, &error);
1392          time_taken = clock() - start_time;          time_taken = clock() - start_time;
1393          if (extra != NULL) free(extra);          if (extra != NULL) free(extra);
1394          fprintf(outfile, "  Study time %.3f milliseconds\n",          fprintf(outfile, "  Study time %.4f milliseconds\n",
1395            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /            (((double)time_taken * 1000.0) / (double)timeit) /
1396              (double)CLOCKS_PER_SEC);              (double)CLOCKS_PER_SEC);
1397          }          }
1398        extra = pcre_study(re, study_options, &error);        extra = pcre_study(re, study_options, &error);
# Line 980  while (!done) Line 1410  while (!done)
1410      if (do_flip)      if (do_flip)
1411        {        {
1412        real_pcre *rre = (real_pcre *)re;        real_pcre *rre = (real_pcre *)re;
1413        rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));        rre->magic_number =
1414            byteflip(rre->magic_number, sizeof(rre->magic_number));
1415        rre->size = byteflip(rre->size, sizeof(rre->size));        rre->size = byteflip(rre->size, sizeof(rre->size));
1416        rre->options = byteflip(rre->options, sizeof(rre->options));        rre->options = byteflip(rre->options, sizeof(rre->options));
1417        rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));        rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
1418        rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));        rre->top_bracket =
1419        rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));          (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1420        rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));        rre->top_backref =
1421        rre->name_table_offset = byteflip(rre->name_table_offset,          (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1422          rre->first_byte =
1423            (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
1424          rre->req_byte =
1425            (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
1426          rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1427          sizeof(rre->name_table_offset));          sizeof(rre->name_table_offset));
1428        rre->name_entry_size = byteflip(rre->name_entry_size,        rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
1429          sizeof(rre->name_entry_size));          sizeof(rre->name_entry_size));
1430        rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));        rre->name_count = (pcre_uint16)byteflip(rre->name_count,
1431            sizeof(rre->name_count));
1432    
1433        if (extra != NULL)        if (extra != NULL)
1434          {          {
# Line 1005  while (!done) Line 1442  while (!done)
1442    
1443      SHOW_INFO:      SHOW_INFO:
1444    
1445        if (do_debug)
1446          {
1447          fprintf(outfile, "------------------------------------------------------------------\n");
1448          pcre_printint(re, outfile, debug_lengths);
1449          }
1450    
1451      if (do_showinfo)      if (do_showinfo)
1452        {        {
1453        unsigned long int get_options, all_options;        unsigned long int get_options, all_options;
1454    #if !defined NOINFOCHECK
1455        int old_first_char, old_options, old_count;        int old_first_char, old_options, old_count;
1456        int count, backrefmax, first_char, need_char;  #endif
1457          int count, backrefmax, first_char, need_char, okpartial, jchanged,
1458            hascrorlf;
1459        int nameentrysize, namecount;        int nameentrysize, namecount;
1460        const uschar *nametable;        const uschar *nametable;
1461    
       if (do_debug)  
         {  
         fprintf(outfile, "------------------------------------------------------------------\n");  
         print_internals(re, outfile);  
         }  
   
1462        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1463        new_info(re, NULL, PCRE_INFO_SIZE, &size);        new_info(re, NULL, PCRE_INFO_SIZE, &size);
1464        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
# Line 1028  while (!done) Line 1468  while (!done)
1468        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1469        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1470        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1471          new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1472          new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1473          new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1474    
1475    #if !defined NOINFOCHECK
1476        old_count = pcre_info(re, &old_options, &old_first_char);        old_count = pcre_info(re, &old_options, &old_first_char);
1477        if (count < 0) fprintf(outfile,        if (count < 0) fprintf(outfile,
1478          "Error %d from pcre_info()\n", count);          "Error %d from pcre_info()\n", count);
# Line 1046  while (!done) Line 1490  while (!done)
1490            "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",            "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1491              get_options, old_options);              get_options, old_options);
1492          }          }
1493    #endif
1494    
1495        if (size != regex_gotten_store) fprintf(outfile,        if (size != regex_gotten_store) fprintf(outfile,
1496          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1497          size, regex_gotten_store);          (int)size, (int)regex_gotten_store);
1498    
1499        fprintf(outfile, "Capturing subpattern count = %d\n", count);        fprintf(outfile, "Capturing subpattern count = %d\n", count);
1500        if (backrefmax > 0)        if (backrefmax > 0)
# Line 1067  while (!done) Line 1512  while (!done)
1512            }            }
1513          }          }
1514    
1515        /* The NOPARTIAL bit is a private bit in the options, so we have        if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1516        to fish it out via out back door */        if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
1517    
1518        all_options = ((real_pcre *)re)->options;        all_options = ((real_pcre *)re)->options;
1519        if (do_flip)        if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
         {  
         all_options = byteflip(all_options, sizeof(all_options));  
         }  
   
       if ((all_options & PCRE_NOPARTIAL) != 0)  
         fprintf(outfile, "Partial matching not supported\n");  
1520    
1521        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
1522          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1523            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1524            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1525            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1526            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1527              ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1528            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1529              ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
1530              ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
1531            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1532            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1533            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1534              ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1535            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1536            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "");            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1537              ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1538    
1539          if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1540    
1541          switch (get_options & PCRE_NEWLINE_BITS)
1542            {
1543            case PCRE_NEWLINE_CR:
1544            fprintf(outfile, "Forced newline sequence: CR\n");
1545            break;
1546    
1547            case PCRE_NEWLINE_LF:
1548            fprintf(outfile, "Forced newline sequence: LF\n");
1549            break;
1550    
1551            case PCRE_NEWLINE_CRLF:
1552            fprintf(outfile, "Forced newline sequence: CRLF\n");
1553            break;
1554    
1555            case PCRE_NEWLINE_ANYCRLF:
1556            fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1557            break;
1558    
1559        if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)          case PCRE_NEWLINE_ANY:
1560          fprintf(outfile, "Case state changes\n");          fprintf(outfile, "Forced newline sequence: ANY\n");
1561            break;
1562    
1563            default:
1564            break;
1565            }
1566    
1567        if (first_char == -1)        if (first_char == -1)
1568          {          {
1569          fprintf(outfile, "First char at start or follows \\n\n");          fprintf(outfile, "First char at start or follows newline\n");
1570          }          }
1571        else if (first_char < 0)        else if (first_char < 0)
1572          {          {
# Line 1108  while (!done) Line 1577  while (!done)
1577          int ch = first_char & 255;          int ch = first_char & 255;
1578          const char *caseless = ((first_char & REQ_CASELESS) == 0)?          const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1579            "" : " (caseless)";            "" : " (caseless)";
1580          if (isprint(ch))          if (PRINTHEX(ch))
1581            fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);            fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1582          else          else
1583            fprintf(outfile, "First char = %d%s\n", ch, caseless);            fprintf(outfile, "First char = %d%s\n", ch, caseless);
# Line 1123  while (!done) Line 1592  while (!done)
1592          int ch = need_char & 255;          int ch = need_char & 255;
1593          const char *caseless = ((need_char & REQ_CASELESS) == 0)?          const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1594            "" : " (caseless)";            "" : " (caseless)";
1595          if (isprint(ch))          if (PRINTHEX(ch))
1596            fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);            fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1597          else          else
1598            fprintf(outfile, "Need char = %d%s\n", ch, caseless);            fprintf(outfile, "Need char = %d%s\n", ch, caseless);
# Line 1159  while (!done) Line 1628  while (!done)
1628                    fprintf(outfile, "\n  ");                    fprintf(outfile, "\n  ");
1629                    c = 2;                    c = 2;
1630                    }                    }
1631                  if (isprint(i) && i != ' ')                  if (PRINTHEX(i) && i != ' ')
1632                    {                    {
1633                    fprintf(outfile, "%c ", i);                    fprintf(outfile, "%c ", i);
1634                    c += 2;                    c += 2;
# Line 1191  while (!done) Line 1660  while (!done)
1660        else        else
1661          {          {
1662          uschar sbuf[8];          uschar sbuf[8];
1663          sbuf[0] = (true_size >> 24)  & 255;          sbuf[0] = (uschar)((true_size >> 24) & 255);
1664          sbuf[1] = (true_size >> 16)  & 255;          sbuf[1] = (uschar)((true_size >> 16) & 255);
1665          sbuf[2] = (true_size >>  8)  & 255;          sbuf[2] = (uschar)((true_size >>  8) & 255);
1666          sbuf[3] = (true_size)  & 255;          sbuf[3] = (uschar)((true_size) & 255);
1667    
1668          sbuf[4] = (true_study_size >> 24)  & 255;          sbuf[4] = (uschar)((true_study_size >> 24) & 255);
1669          sbuf[5] = (true_study_size >> 16)  & 255;          sbuf[5] = (uschar)((true_study_size >> 16) & 255);
1670          sbuf[6] = (true_study_size >>  8)  & 255;          sbuf[6] = (uschar)((true_study_size >>  8) & 255);
1671          sbuf[7] = (true_study_size)  & 255;          sbuf[7] = (uschar)((true_study_size) & 255);
1672    
1673          if (fwrite(sbuf, 1, 8, f) < 8 ||          if (fwrite(sbuf, 1, 8, f) < 8 ||
1674              fwrite(re, 1, true_size, f) < true_size)              fwrite(re, 1, true_size, f) < true_size)
# Line 1218  while (!done) Line 1687  while (!done)
1687                  strerror(errno));                  strerror(errno));
1688                }                }
1689              else fprintf(outfile, "Study data written to %s\n", to_file);              else fprintf(outfile, "Study data written to %s\n", to_file);
1690    
1691              }              }
1692            }            }
1693          fclose(f);          fclose(f);
1694          }          }
1695    
1696          new_free(re);
1697          if (extra != NULL) new_free(extra);
1698          if (tables != NULL) new_free((void *)tables);
1699        continue;  /* With next regex */        continue;  /* With next regex */
1700        }        }
1701      }        /* End of non-POSIX compile */      }        /* End of non-POSIX compile */
# Line 1230  while (!done) Line 1704  while (!done)
1704    
1705    for (;;)    for (;;)
1706      {      {
1707      unsigned char *q;      uschar *q;
1708      unsigned char *bptr = dbuffer;      uschar *bptr;
1709      int *use_offsets = offsets;      int *use_offsets = offsets;
1710      int use_size_offsets = size_offsets;      int use_size_offsets = size_offsets;
1711      int callout_data = 0;      int callout_data = 0;
1712      int callout_data_set = 0;      int callout_data_set = 0;
1713      int count, c;      int count, c;
1714      int copystrings = 0;      int copystrings = 0;
1715      int find_match_limit = 0;      int find_match_limit = default_find_match_limit;
1716      int getstrings = 0;      int getstrings = 0;
1717      int getlist = 0;      int getlist = 0;
1718      int gmatched = 0;      int gmatched = 0;
1719      int start_offset = 0;      int start_offset = 0;
1720      int g_notempty = 0;      int g_notempty = 0;
1721        int use_dfa = 0;
1722    
1723      options = 0;      options = 0;
1724    
1725        *copynames = 0;
1726        *getnames = 0;
1727    
1728        copynamesptr = copynames;
1729        getnamesptr = getnames;
1730    
1731      pcre_callout = callout;      pcre_callout = callout;
1732      first_callout = 1;      first_callout = 1;
1733      callout_extra = 0;      callout_extra = 0;
# Line 1255  while (!done) Line 1736  while (!done)
1736      callout_fail_id = -1;      callout_fail_id = -1;
1737      show_malloc = 0;      show_malloc = 0;
1738    
1739      if (infile == stdin) printf("data> ");      if (extra != NULL) extra->flags &=
1740      if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)        ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1741    
1742        len = 0;
1743        for (;;)
1744        {        {
1745        done = 1;        if (extend_inputline(infile, buffer + len, "data> ") == NULL)
1746        goto CONTINUE;          {
1747            if (len > 0) break;
1748            done = 1;
1749            goto CONTINUE;
1750            }
1751          if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1752          len = (int)strlen((char *)buffer);
1753          if (buffer[len-1] == '\n') break;
1754        }        }
     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);  
1755    
     len = (int)strlen((char *)buffer);  
1756      while (len > 0 && isspace(buffer[len-1])) len--;      while (len > 0 && isspace(buffer[len-1])) len--;
1757      buffer[len] = 0;      buffer[len] = 0;
1758      if (len == 0) break;      if (len == 0) break;
# Line 1271  while (!done) Line 1760  while (!done)
1760      p = buffer;      p = buffer;
1761      while (isspace(*p)) p++;      while (isspace(*p)) p++;
1762    
1763      q = dbuffer;      bptr = q = dbuffer;
1764      while ((c = *p++) != 0)      while ((c = *p++) != 0)
1765        {        {
1766        int i = 0;        int i = 0;
# Line 1293  while (!done) Line 1782  while (!done)
1782          c -= '0';          c -= '0';
1783          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1784            c = c * 8 + *p++ - '0';            c = c * 8 + *p++ - '0';
1785    
1786    #if !defined NOUTF8
1787            if (use_utf8 && c > 255)
1788              {
1789              unsigned char buff8[8];
1790              int ii, utn;
1791              utn = ord2utf8(c, buff8);
1792              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1793              c = buff8[ii];   /* Last byte */
1794              }
1795    #endif
1796          break;          break;
1797    
1798          case 'x':          case 'x':
1799    
1800          /* Handle \x{..} specially - new Perl thing for utf8 */          /* Handle \x{..} specially - new Perl thing for utf8 */
1801    
1802    #if !defined NOUTF8
1803          if (*p == '{')          if (*p == '{')
1804            {            {
1805            unsigned char *pt = p;            unsigned char *pt = p;
# Line 1309  while (!done) Line 1810  while (!done)
1810              {              {
1811              unsigned char buff8[8];              unsigned char buff8[8];
1812              int ii, utn;              int ii, utn;
1813              utn = ord2utf8(c, buff8);              if (use_utf8)
1814              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];                {
1815              c = buff8[ii];   /* Last byte */                utn = ord2utf8(c, buff8);
1816                  for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1817                  c = buff8[ii];   /* Last byte */
1818                  }
1819                else
1820                 {
1821                 if (c > 255)
1822                   fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
1823                     "UTF-8 mode is not enabled.\n"
1824                     "** Truncation will probably give the wrong result.\n", c);
1825                 }
1826              p = pt + 1;              p = pt + 1;
1827              break;              break;
1828              }              }
1829            /* Not correct form; fall through */            /* Not correct form; fall through */
1830            }            }
1831    #endif
1832    
1833          /* Ordinary \x */          /* Ordinary \x */
1834    
# Line 1352  while (!done) Line 1864  while (!done)
1864            }            }
1865          else if (isalnum(*p))          else if (isalnum(*p))
1866            {            {
1867            uschar name[256];            uschar *npp = copynamesptr;
           uschar *npp = name;  
1868            while (isalnum(*p)) *npp++ = *p++;            while (isalnum(*p)) *npp++ = *p++;
1869              *npp++ = 0;
1870            *npp = 0;            *npp = 0;
1871            n = pcre_get_stringnumber(re, (char *)name);            n = pcre_get_stringnumber(re, (char *)copynamesptr);
1872            if (n < 0)            if (n < 0)
1873              fprintf(outfile, "no parentheses with name \"%s\"\n", name);              fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1874            else copystrings |= 1 << n;            copynamesptr = npp;
1875            }            }
1876          else if (*p == '+')          else if (*p == '+')
1877            {            {
# Line 1397  while (!done) Line 1909  while (!done)
1909            }            }
1910          continue;          continue;
1911    
1912    #if !defined NODFA
1913            case 'D':
1914    #if !defined NOPOSIX
1915            if (posix || do_posix)
1916              printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1917            else
1918    #endif
1919              use_dfa = 1;
1920            continue;
1921    
1922            case 'F':
1923            options |= PCRE_DFA_SHORTEST;
1924            continue;
1925    #endif
1926    
1927          case 'G':          case 'G':
1928          if (isdigit(*p))          if (isdigit(*p))
1929            {            {
# Line 1405  while (!done) Line 1932  while (!done)
1932            }            }
1933          else if (isalnum(*p))          else if (isalnum(*p))
1934            {            {
1935            uschar name[256];            uschar *npp = getnamesptr;
           uschar *npp = name;  
1936            while (isalnum(*p)) *npp++ = *p++;            while (isalnum(*p)) *npp++ = *p++;
1937              *npp++ = 0;
1938            *npp = 0;            *npp = 0;
1939            n = pcre_get_stringnumber(re, (char *)name);            n = pcre_get_stringnumber(re, (char *)getnamesptr);
1940            if (n < 0)            if (n < 0)
1941              fprintf(outfile, "no parentheses with name \"%s\"\n", name);              fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1942            else getstrings |= 1 << n;            getnamesptr = npp;
1943            }            }
1944          continue;          continue;
1945    
# Line 1438  while (!done) Line 1965  while (!done)
1965            if (offsets == NULL)            if (offsets == NULL)
1966              {              {
1967              printf("** Failed to get %d bytes of memory for offsets vector\n",              printf("** Failed to get %d bytes of memory for offsets vector\n",
1968                size_offsets_max * sizeof(int));                (int)(size_offsets_max * sizeof(int)));
1969              return 1;              yield = 1;
1970                goto EXIT;
1971              }              }
1972            }            }
1973          use_size_offsets = n;          use_size_offsets = n;
# Line 1450  while (!done) Line 1978  while (!done)
1978          options |= PCRE_PARTIAL;          options |= PCRE_PARTIAL;
1979          continue;          continue;
1980    
1981            case 'Q':
1982            while(isdigit(*p)) n = n * 10 + *p++ - '0';
1983            if (extra == NULL)
1984              {
1985              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1986              extra->flags = 0;
1987              }
1988            extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1989            extra->match_limit_recursion = n;
1990            continue;
1991    
1992            case 'q':
1993            while(isdigit(*p)) n = n * 10 + *p++ - '0';
1994            if (extra == NULL)
1995              {
1996              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1997              extra->flags = 0;
1998              }
1999            extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
2000            extra->match_limit = n;
2001            continue;
2002    
2003    #if !defined NODFA
2004            case 'R':
2005            options |= PCRE_DFA_RESTART;
2006            continue;
2007    #endif
2008    
2009          case 'S':          case 'S':
2010          show_malloc = 1;          show_malloc = 1;
2011          continue;          continue;
# Line 1461  while (!done) Line 2017  while (!done)
2017          case '?':          case '?':
2018          options |= PCRE_NO_UTF8_CHECK;          options |= PCRE_NO_UTF8_CHECK;
2019          continue;          continue;
2020    
2021            case '<':
2022              {
2023              int x = check_newline(p, outfile);
2024              if (x == 0) goto NEXT_DATA;
2025              options |= x;
2026              while (*p++ != '>');
2027              }
2028            continue;
2029          }          }
2030        *q++ = c;        *q++ = c;
2031        }        }
2032      *q = 0;      *q = 0;
2033      len = q - dbuffer;      len = q - dbuffer;
2034    
2035        /* Move the data to the end of the buffer so that a read over the end of
2036        the buffer will be seen by valgrind, even if it doesn't cause a crash. If
2037        we are using the POSIX interface, we must include the terminating zero. */
2038    
2039    #if !defined NOPOSIX
2040        if (posix || do_posix)
2041          {
2042          memmove(bptr + buffer_size - len - 1, bptr, len + 1);
2043          bptr += buffer_size - len - 1;
2044          }
2045        else
2046    #endif
2047          {
2048          memmove(bptr + buffer_size - len, bptr, len);
2049          bptr += buffer_size - len;
2050          }
2051    
2052        if ((all_use_dfa || use_dfa) && find_match_limit)
2053          {
2054          printf("**Match limit not relevant for DFA matching: ignored\n");
2055          find_match_limit = 0;
2056          }
2057    
2058      /* Handle matching via the POSIX interface, which does not      /* Handle matching via the POSIX interface, which does not
2059      support timing or playing with the match limit or callout data. */      support timing or playing with the match limit or callout data. */
2060    
# Line 1480  while (!done) Line 2068  while (!done)
2068          pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);          pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
2069        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
2070        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
2071          if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
2072    
2073        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
2074    
2075        if (rc != 0)        if (rc != 0)
2076          {          {
2077          (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);          (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2078          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
2079          }          }
2080          else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
2081                  != 0)
2082            {
2083            fprintf(outfile, "Matched with REG_NOSUB\n");
2084            }
2085        else        else
2086          {          {
2087          size_t i;          size_t i;
# Line 1519  while (!done) Line 2113  while (!done)
2113    
2114      for (;; gmatched++)    /* Loop for /g or /G */      for (;; gmatched++)    /* Loop for /g or /G */
2115        {        {
2116        if (timeit)        if (timeitm > 0)
2117          {          {
2118          register int i;          register int i;
2119          clock_t time_taken;          clock_t time_taken;
2120          clock_t start_time = clock();          clock_t start_time = clock();
2121          for (i = 0; i < LOOPREPEAT; i++)  
2122    #if !defined NODFA
2123            if (all_use_dfa || use_dfa)
2124              {
2125              int workspace[1000];
2126              for (i = 0; i < timeitm; i++)
2127                count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2128                  options | g_notempty, use_offsets, use_size_offsets, workspace,
2129                  sizeof(workspace)/sizeof(int));
2130              }
2131            else
2132    #endif
2133    
2134            for (i = 0; i < timeitm; i++)
2135            count = pcre_exec(re, extra, (char *)bptr, len,            count = pcre_exec(re, extra, (char *)bptr, len,
2136              start_offset, options | g_notempty, use_offsets, use_size_offsets);              start_offset, options | g_notempty, use_offsets, use_size_offsets);
2137    
2138          time_taken = clock() - start_time;          time_taken = clock() - start_time;
2139          fprintf(outfile, "Execute time %.3f milliseconds\n",          fprintf(outfile, "Execute time %.4f milliseconds\n",
2140            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /            (((double)time_taken * 1000.0) / (double)timeitm) /
2141              (double)CLOCKS_PER_SEC);              (double)CLOCKS_PER_SEC);
2142          }          }
2143    
2144        /* If find_match_limit is set, we want to do repeated matches with        /* If find_match_limit is set, we want to do repeated matches with
2145        varying limits in order to find the minimum value. */        varying limits in order to find the minimum value for the match limit and
2146          for the recursion limit. */
2147    
2148        if (find_match_limit)        if (find_match_limit)
2149          {          {
         int min = 0;  
         int mid = 64;  
         int max = -1;  
   
2150          if (extra == NULL)          if (extra == NULL)
2151            {            {
2152            extra = (pcre_extra *)malloc(sizeof(pcre_extra));            extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2153            extra->flags = 0;            extra->flags = 0;
2154            }            }
         extra->flags |= PCRE_EXTRA_MATCH_LIMIT;  
   
         for (;;)  
           {  
           extra->match_limit = mid;  
           count = pcre_exec(re, extra, (char *)bptr, len, start_offset,  
             options | g_notempty, use_offsets, use_size_offsets);  
           if (count == PCRE_ERROR_MATCHLIMIT)  
             {  
             /* fprintf(outfile, "Testing match limit = %d\n", mid); */  
             min = mid;  
             mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;  
             }  
           else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||  
                                  count == PCRE_ERROR_PARTIAL)  
             {  
             if (mid == min + 1)  
               {  
               fprintf(outfile, "Minimum match limit = %d\n", mid);  
               break;  
               }  
             /* fprintf(outfile, "Testing match limit = %d\n", mid); */  
             max = mid;  
             mid = (min + mid)/2;  
             }  
           else break;    /* Some other error */  
           }  
2155    
2156          extra->flags &= ~PCRE_EXTRA_MATCH_LIMIT;          (void)check_match_limit(re, extra, bptr, len, start_offset,
2157              options|g_notempty, use_offsets, use_size_offsets,
2158              PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2159              PCRE_ERROR_MATCHLIMIT, "match()");
2160    
2161            count = check_match_limit(re, extra, bptr, len, start_offset,
2162              options|g_notempty, use_offsets, use_size_offsets,
2163              PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2164              PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2165          }          }
2166    
2167        /* If callout_data is set, use the interface with additional data */        /* If callout_data is set, use the interface with additional data */
# Line 1597  while (!done) Line 2183  while (!done)
2183        /* The normal case is just to do the match once, with the default        /* The normal case is just to do the match once, with the default
2184        value of match_limit. */        value of match_limit. */
2185    
2186        else  #if !defined NODFA
2187          else if (all_use_dfa || use_dfa)
2188          {          {
2189          count = pcre_exec(re, extra, (char *)bptr, len,          int workspace[1000];
2190            start_offset, options | g_notempty, use_offsets, use_size_offsets);          count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2191              options | g_notempty, use_offsets, use_size_offsets, workspace,
2192              sizeof(workspace)/sizeof(int));
2193            if (count == 0)
2194              {
2195              fprintf(outfile, "Matched, but too many subsidiary matches\n");
2196              count = use_size_offsets/2;
2197              }
2198          }          }
2199    #endif
2200    
2201        if (count == 0)        else
2202          {          {
2203          fprintf(outfile, "Matched, but too many substrings\n");          count = pcre_exec(re, extra, (char *)bptr, len,
2204          count = use_size_offsets/3;            start_offset, options | g_notempty, use_offsets, use_size_offsets);
2205            if (count == 0)
2206              {
2207              fprintf(outfile, "Matched, but too many substrings\n");
2208              count = use_size_offsets/3;
2209              }
2210          }          }
2211    
2212        /* Matched */        /* Matched */
2213    
2214        if (count >= 0)        if (count >= 0)
2215          {          {
2216          int i;          int i, maxcount;
2217    
2218    #if !defined NODFA
2219            if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2220    #endif
2221              maxcount = use_size_offsets/3;
2222    
2223            /* This is a check against a lunatic return value. */
2224    
2225            if (count > maxcount)
2226              {
2227              fprintf(outfile,
2228                "** PCRE error: returned count %d is too big for offset size %d\n",
2229                count, use_size_offsets);
2230              count = use_size_offsets/3;
2231              if (do_g || do_G)
2232                {
2233                fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2234                do_g = do_G = FALSE;        /* Break g/G loop */
2235                }
2236              }
2237    
2238          for (i = 0; i < count * 2; i += 2)          for (i = 0; i < count * 2; i += 2)
2239            {            {
2240            if (use_offsets[i] < 0)            if (use_offsets[i] < 0)
# Line 1641  while (!done) Line 2262  while (!done)
2262            {            {
2263            if ((copystrings & (1 << i)) != 0)            if ((copystrings & (1 << i)) != 0)
2264              {              {
2265              char copybuffer[16];              char copybuffer[256];
2266              int rc = pcre_copy_substring((char *)bptr, use_offsets, count,              int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2267                i, copybuffer, sizeof(copybuffer));                i, copybuffer, sizeof(copybuffer));
2268              if (rc < 0)              if (rc < 0)
# Line 1651  while (!done) Line 2272  while (!done)
2272              }              }
2273            }            }
2274    
2275            for (copynamesptr = copynames;
2276                 *copynamesptr != 0;
2277                 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2278              {
2279              char copybuffer[256];
2280              int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2281                count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2282              if (rc < 0)
2283                fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2284              else
2285                fprintf(outfile, "  C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2286              }
2287    
2288          for (i = 0; i < 32; i++)          for (i = 0; i < 32; i++)
2289            {            {
2290            if ((getstrings & (1 << i)) != 0)            if ((getstrings & (1 << i)) != 0)
# Line 1663  while (!done) Line 2297  while (!done)
2297              else              else
2298                {                {
2299                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
               /* free((void *)substring); */  
2300                pcre_free_substring(substring);                pcre_free_substring(substring);
2301                }                }
2302              }              }
2303            }            }
2304    
2305            for (getnamesptr = getnames;
2306                 *getnamesptr != 0;
2307                 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2308              {
2309              const char *substring;
2310              int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2311                count, (char *)getnamesptr, &substring);
2312              if (rc < 0)
2313                fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2314              else
2315                {
2316                fprintf(outfile, "  G %s (%d) %s\n", substring, rc, getnamesptr);
2317                pcre_free_substring(substring);
2318                }
2319              }
2320    
2321          if (getlist)          if (getlist)
2322            {            {
2323            const char **stringlist;            const char **stringlist;
# Line 1692  while (!done) Line 2341  while (!done)
2341    
2342        else if (count == PCRE_ERROR_PARTIAL)        else if (count == PCRE_ERROR_PARTIAL)
2343          {          {
2344          fprintf(outfile, "Partial match\n");          fprintf(outfile, "Partial match");
2345    #if !defined NODFA
2346            if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
2347              fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
2348                bptr + use_offsets[0]);
2349    #endif
2350            fprintf(outfile, "\n");
2351          break;  /* Out of the /g loop */          break;  /* Out of the /g loop */
2352          }          }
2353    
2354        /* Failed to match. If this is a /g or /G loop and we previously set        /* Failed to match. If this is a /g or /G loop and we previously set
2355        g_notempty after a null match, this is not necessarily the end.        g_notempty after a null match, this is not necessarily the end. We want
2356        We want to advance the start offset, and continue. In the case of UTF-8        to advance the start offset, and continue. We won't be at the end of the
2357        matching, the advance must be one character, not one byte. Fudge the        string - that was checked before setting g_notempty.
2358        offset values to achieve this. We won't be at the end of the string -  
2359        that was checked before setting g_notempty. */        Complication arises in the case when the newline option is "any" or
2360          "anycrlf". If the previous match was at the end of a line terminated by
2361          CRLF, an advance of one character just passes the \r, whereas we should
2362          prefer the longer newline sequence, as does the code in pcre_exec().
2363          Fudge the offset value to achieve this.
2364    
2365          Otherwise, in the case of UTF-8 matching, the advance must be one
2366          character, not one byte. */
2367    
2368        else        else
2369          {          {
2370          if (g_notempty != 0)          if (g_notempty != 0)
2371            {            {
2372            int onechar = 1;            int onechar = 1;
2373              unsigned int obits = ((real_pcre *)re)->options;
2374            use_offsets[0] = start_offset;            use_offsets[0] = start_offset;
2375            if (use_utf8)            if ((obits & PCRE_NEWLINE_BITS) == 0)
2376                {
2377                int d;
2378                (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2379                obits = (d == '\r')? PCRE_NEWLINE_CR :
2380                        (d == '\n')? PCRE_NEWLINE_LF :
2381                        (d == ('\r'<<8 | '\n'))? PCRE_NEWLINE_CRLF :
2382                        (d == -2)? PCRE_NEWLINE_ANYCRLF :
2383                        (d == -1)? PCRE_NEWLINE_ANY : 0;
2384                }
2385              if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2386                   (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2387                  &&
2388                  start_offset < len - 1 &&
2389                  bptr[start_offset] == '\r' &&
2390                  bptr[start_offset+1] == '\n')
2391                onechar++;
2392              else if (use_utf8)
2393              {              {
2394              while (start_offset + onechar < len)              while (start_offset + onechar < len)
2395                {                {
# Line 1744  while (!done) Line 2424  while (!done)
2424        character. */        character. */
2425    
2426        g_notempty = 0;        g_notempty = 0;
2427    
2428        if (use_offsets[0] == use_offsets[1])        if (use_offsets[0] == use_offsets[1])
2429          {          {
2430          if (use_offsets[0] == len) break;          if (use_offsets[0] == len) break;
# Line 1762  while (!done) Line 2443  while (!done)
2443          len -= use_offsets[1];          len -= use_offsets[1];
2444          }          }
2445        }  /* End of loop for /g and /G */        }  /* End of loop for /g and /G */
2446    
2447        NEXT_DATA: continue;
2448      }    /* End of loop for data lines */      }    /* End of loop for data lines */
2449    
2450    CONTINUE:    CONTINUE:
# Line 1770  while (!done) Line 2453  while (!done)
2453    if (posix || do_posix) regfree(&preg);    if (posix || do_posix) regfree(&preg);
2454  #endif  #endif
2455    
2456    if (re != NULL) free(re);    if (re != NULL) new_free(re);
2457    if (extra != NULL) free(extra);    if (extra != NULL) new_free(extra);
2458    if (tables != NULL)    if (tables != NULL)
2459      {      {
2460      free((void *)tables);      new_free((void *)tables);
2461      setlocale(LC_CTYPE, "C");      setlocale(LC_CTYPE, "C");
2462        locale_set = 0;
2463      }      }
2464    }    }
2465    
2466  if (infile == stdin) fprintf(outfile, "\n");  if (infile == stdin) fprintf(outfile, "\n");
2467  return 0;  
2468    EXIT:
2469    
2470    if (infile != NULL && infile != stdin) fclose(infile);
2471    if (outfile != NULL && outfile != stdout) fclose(outfile);
2472    
2473    free(buffer);
2474    free(dbuffer);
2475    free(pbuffer);
2476    free(offsets);
2477    
2478    return yield;
2479  }  }
2480    
2481  /* End */  /* End of pcretest.c */

Legend:
Removed from v.75  
changed lines
  Added in v.388

  ViewVC Help
Powered by ViewVC 1.1.5