/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 77 by nigel, Sat Feb 24 21:40:45 2007 UTC revision 355 by ph10, Mon Jul 7 17:45:23 2008 UTC
# Line 36  POSSIBILITY OF SUCH DAMAGE. Line 36  POSSIBILITY OF SUCH DAMAGE.
36  */  */
37    
38    
39    #ifdef HAVE_CONFIG_H
40    #include "config.h"
41    #endif
42    
43  #include <ctype.h>  #include <ctype.h>
44  #include <stdio.h>  #include <stdio.h>
45  #include <string.h>  #include <string.h>
# Line 44  POSSIBILITY OF SUCH DAMAGE. Line 48  POSSIBILITY OF SUCH DAMAGE.
48  #include <locale.h>  #include <locale.h>
49  #include <errno.h>  #include <errno.h>
50    
51  #define PCRE_SPY        /* For Win32 build, import data, not export */  #ifdef SUPPORT_LIBREADLINE
52    #ifdef HAVE_UNISTD_H
53    #include <unistd.h>
54    #endif
55    #include <readline/readline.h>
56    #include <readline/history.h>
57    #endif
58    
59    
60    /* A number of things vary for Windows builds. Originally, pcretest opened its
61    input and output without "b"; then I was told that "b" was needed in some
62    environments, so it was added for release 5.0 to both the input and output. (It
63    makes no difference on Unix-like systems.) Later I was told that it is wrong
64    for the input on Windows. I've now abstracted the modes into two macros that
65    are set here, to make it easier to fiddle with them, and removed "b" from the
66    input mode under Windows. */
67    
68    #if defined(_WIN32) || defined(WIN32)
69    #include <io.h>                /* For _setmode() */
70    #include <fcntl.h>             /* For _O_BINARY */
71    #define INPUT_MODE   "r"
72    #define OUTPUT_MODE  "wb"
73    
74    #define isatty _isatty         /* This is what Windows calls them, I'm told */
75    #define fileno _fileno
76    
77    #else
78    #include <sys/time.h>          /* These two includes are needed */
79    #include <sys/resource.h>      /* for setrlimit(). */
80    #define INPUT_MODE   "rb"
81    #define OUTPUT_MODE  "wb"
82    #endif
83    
84    
85  /* We need the internal info for displaying the results of pcre_study() and  /* We have to include pcre_internal.h because we need the internal info for
86  other internal data; pcretest also uses some of the fixed tables, and generally  displaying the results of pcre_study() and we also need to know about the
87  has "inside information" compared to a program that strictly follows the PCRE  internal macros, structures, and other internal data values; pcretest has
88  API. */  "inside information" compared to a program that strictly follows the PCRE API.
89    
90    Although pcre_internal.h does itself include pcre.h, we explicitly include it
91    here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
92    appropriately for an application, not for building PCRE. */
93    
94    #include "pcre.h"
95  #include "pcre_internal.h"  #include "pcre_internal.h"
96    
97    /* We need access to some of the data tables that PCRE uses. So as not to have
98    to keep two copies, we include the source file here, changing the names of the
99    external symbols to prevent clashes. */
100    
101    #define _pcre_ucp_gentype      ucp_gentype
102    #define _pcre_utf8_table1      utf8_table1
103    #define _pcre_utf8_table1_size utf8_table1_size
104    #define _pcre_utf8_table2      utf8_table2
105    #define _pcre_utf8_table3      utf8_table3
106    #define _pcre_utf8_table4      utf8_table4
107    #define _pcre_utt              utt
108    #define _pcre_utt_size         utt_size
109    #define _pcre_utt_names        utt_names
110    #define _pcre_OP_lengths       OP_lengths
111    
112    #include "pcre_tables.c"
113    
114    /* We also need the pcre_printint() function for printing out compiled
115    patterns. This function is in a separate file so that it can be included in
116    pcre_compile.c when that module is compiled with debugging enabled.
117    
118    The definition of the macro PRINTABLE, which determines whether to print an
119    output character as-is or as a hex value when showing compiled patterns, is
120    contained in this file. We uses it here also, in cases when the locale has not
121    been explicitly changed, so as to get consistent output from systems that
122    differ in their output from isprint() even in the "C" locale. */
123    
124    #include "pcre_printint.src"
125    
126    #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
127    
128    
129  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
130  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 62  Makefile. */ Line 134  Makefile. */
134  #include "pcreposix.h"  #include "pcreposix.h"
135  #endif  #endif
136    
137    /* It is also possible, for the benefit of the version currently imported into
138    Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
139    interface to the DFA matcher (NODFA), and without the doublecheck of the old
140    "info" function (define NOINFOCHECK). In fact, we automatically cut out the
141    UTF8 support if PCRE is built without it. */
142    
143    #ifndef SUPPORT_UTF8
144    #ifndef NOUTF8
145    #define NOUTF8
146    #endif
147    #endif
148    
149    
150    /* Other parameters */
151    
152  #ifndef CLOCKS_PER_SEC  #ifndef CLOCKS_PER_SEC
153  #ifdef CLK_TCK  #ifdef CLK_TCK
154  #define CLOCKS_PER_SEC CLK_TCK  #define CLOCKS_PER_SEC CLK_TCK
# Line 70  Makefile. */ Line 157  Makefile. */
157  #endif  #endif
158  #endif  #endif
159    
160  #define LOOPREPEAT 500000  /* This is the default loop count for timing. */
161    
162  #define BUFFER_SIZE 30000  #define LOOPREPEAT 500000
 #define PBUFFER_SIZE BUFFER_SIZE  
 #define DBUFFER_SIZE BUFFER_SIZE  
163    
164    /* Static variables */
165    
166  static FILE *outfile;  static FILE *outfile;
167  static int log_store = 0;  static int log_store = 0;
# Line 83  static int callout_count; Line 169  static int callout_count;
169  static int callout_extra;  static int callout_extra;
170  static int callout_fail_count;  static int callout_fail_count;
171  static int callout_fail_id;  static int callout_fail_id;
172    static int debug_lengths;
173  static int first_callout;  static int first_callout;
174    static int locale_set = 0;
175  static int show_malloc;  static int show_malloc;
176  static int use_utf8;  static int use_utf8;
177  static size_t gotten_store;  static size_t gotten_store;
178    
179    /* The buffers grow automatically if very long input lines are encountered. */
180    
181    static int buffer_size = 50000;
182    static uschar *buffer = NULL;
183    static uschar *dbuffer = NULL;
184  static uschar *pbuffer = NULL;  static uschar *pbuffer = NULL;
185    
186    
187    
188  /*************************************************  /*************************************************
189    *        Read or extend an input line            *
190    *************************************************/
191    
192    /* Input lines are read into buffer, but both patterns and data lines can be
193    continued over multiple input lines. In addition, if the buffer fills up, we
194    want to automatically expand it so as to be able to handle extremely large
195    lines that are needed for certain stress tests. When the input buffer is
196    expanded, the other two buffers must also be expanded likewise, and the
197    contents of pbuffer, which are a copy of the input for callouts, must be
198    preserved (for when expansion happens for a data line). This is not the most
199    optimal way of handling this, but hey, this is just a test program!
200    
201    Arguments:
202      f            the file to read
203      start        where in buffer to start (this *must* be within buffer)
204      prompt       for stdin or readline()
205    
206    Returns:       pointer to the start of new data
207                   could be a copy of start, or could be moved
208                   NULL if no data read and EOF reached
209    */
210    
211    static uschar *
212    extend_inputline(FILE *f, uschar *start, const char *prompt)
213    {
214    uschar *here = start;
215    
216    for (;;)
217      {
218      int rlen = buffer_size - (here - buffer);
219    
220      if (rlen > 1000)
221        {
222        int dlen;
223    
224        /* If libreadline support is required, use readline() to read a line if the
225        input is a terminal. Note that readline() removes the trailing newline, so
226        we must put it back again, to be compatible with fgets(). */
227    
228    #ifdef SUPPORT_LIBREADLINE
229        if (isatty(fileno(f)))
230          {
231          size_t len;
232          char *s = readline(prompt);
233          if (s == NULL) return (here == start)? NULL : start;
234          len = strlen(s);
235          if (len > 0) add_history(s);
236          if (len > rlen - 1) len = rlen - 1;
237          memcpy(here, s, len);
238          here[len] = '\n';
239          here[len+1] = 0;
240          free(s);
241          }
242        else
243    #endif
244    
245        /* Read the next line by normal means, prompting if the file is stdin. */
246    
247          {
248          if (f == stdin) printf(prompt);
249          if (fgets((char *)here, rlen,  f) == NULL)
250            return (here == start)? NULL : start;
251          }
252    
253        dlen = (int)strlen((char *)here);
254        if (dlen > 0 && here[dlen - 1] == '\n') return start;
255        here += dlen;
256        }
257    
258      else
259        {
260        int new_buffer_size = 2*buffer_size;
261        uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
262        uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
263        uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
264    
265        if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
266          {
267          fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
268          exit(1);
269          }
270    
271        memcpy(new_buffer, buffer, buffer_size);
272        memcpy(new_pbuffer, pbuffer, buffer_size);
273    
274        buffer_size = new_buffer_size;
275    
276        start = new_buffer + (start - buffer);
277        here = new_buffer + (here - buffer);
278    
279        free(buffer);
280        free(dbuffer);
281        free(pbuffer);
282    
283        buffer = new_buffer;
284        dbuffer = new_dbuffer;
285        pbuffer = new_pbuffer;
286        }
287      }
288    
289    return NULL;  /* Control never gets here */
290    }
291    
292    
293    
294    
295    
296    
297    
298    /*************************************************
299  *          Read number from string               *  *          Read number from string               *
300  *************************************************/  *************************************************/
301    
302  /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess  /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
303  around with conditional compilation, just do the job by hand. It is only used  around with conditional compilation, just do the job by hand. It is only used
304  for unpicking the -o argument, so just keep it simple.  for unpicking arguments, so just keep it simple.
305    
306  Arguments:  Arguments:
307    str           string to be converted    str           string to be converted
# Line 128  return(result); Line 331  return(result);
331  and returns the value of the character.  and returns the value of the character.
332    
333  Argument:  Argument:
334    buffer   a pointer to the byte vector    utf8bytes   a pointer to the byte vector
335    vptr     a pointer to an int to receive the value    vptr        a pointer to an int to receive the value
336    
337  Returns:   >  0 => the number of bytes consumed  Returns:      >  0 => the number of bytes consumed
338             -6 to 0 => malformed UTF-8 character at offset = (-return)                -6 to 0 => malformed UTF-8 character at offset = (-return)
339  */  */
340    
341    #if !defined NOUTF8
342    
343  static int  static int
344  utf82ord(unsigned char *buffer, int *vptr)  utf82ord(unsigned char *utf8bytes, int *vptr)
345  {  {
346  int c = *buffer++;  int c = *utf8bytes++;
347  int d = c;  int d = c;
348  int i, j, s;  int i, j, s;
349    
# Line 154  if (i == 0 || i == 6) return 0;        / Line 359  if (i == 0 || i == 6) return 0;        /
359  /* i now has a value in the range 1-5 */  /* i now has a value in the range 1-5 */
360    
361  s = 6*i;  s = 6*i;
362  d = (c & _pcre_utf8_table3[i]) << s;  d = (c & utf8_table3[i]) << s;
363    
364  for (j = 0; j < i; j++)  for (j = 0; j < i; j++)
365    {    {
366    c = *buffer++;    c = *utf8bytes++;
367    if ((c & 0xc0) != 0x80) return -(j+1);    if ((c & 0xc0) != 0x80) return -(j+1);
368    s -= 6;    s -= 6;
369    d |= (c & 0x3f) << s;    d |= (c & 0x3f) << s;
# Line 166  for (j = 0; j < i; j++) Line 371  for (j = 0; j < i; j++)
371    
372  /* Check that encoding was the correct unique one */  /* Check that encoding was the correct unique one */
373    
374  for (j = 0; j < _pcre_utf8_table1_size; j++)  for (j = 0; j < utf8_table1_size; j++)
375    if (d <= _pcre_utf8_table1[j]) break;    if (d <= utf8_table1[j]) break;
376  if (j != i) return -(i+1);  if (j != i) return -(i+1);
377    
378  /* Valid value */  /* Valid value */
# Line 176  if (j != i) return -(i+1); Line 381  if (j != i) return -(i+1);
381  return i+1;  return i+1;
382  }  }
383    
384    #endif
385    
386    
387    
388    /*************************************************
389    *       Convert character value to UTF-8         *
390    *************************************************/
391    
392    /* This function takes an integer value in the range 0 - 0x7fffffff
393    and encodes it as a UTF-8 character in 0 to 6 bytes.
394    
395    Arguments:
396      cvalue     the character value
397      utf8bytes  pointer to buffer for result - at least 6 bytes long
398    
399    Returns:     number of characters placed in the buffer
400    */
401    
402    #if !defined NOUTF8
403    
404    static int
405    ord2utf8(int cvalue, uschar *utf8bytes)
406    {
407    register int i, j;
408    for (i = 0; i < utf8_table1_size; i++)
409      if (cvalue <= utf8_table1[i]) break;
410    utf8bytes += i;
411    for (j = i; j > 0; j--)
412     {
413     *utf8bytes-- = 0x80 | (cvalue & 0x3f);
414     cvalue >>= 6;
415     }
416    *utf8bytes = utf8_table2[i] | cvalue;
417    return i + 1;
418    }
419    
420    #endif
421    
422    
423    
424  /*************************************************  /*************************************************
# Line 188  chars without printing. */ Line 431  chars without printing. */
431    
432  static int pchars(unsigned char *p, int length, FILE *f)  static int pchars(unsigned char *p, int length, FILE *f)
433  {  {
434  int c;  int c = 0;
435  int yield = 0;  int yield = 0;
436    
437  while (length-- > 0)  while (length-- > 0)
438    {    {
439    #if !defined NOUTF8
440    if (use_utf8)    if (use_utf8)
441      {      {
442      int rc = utf82ord(p, &c);      int rc = utf82ord(p, &c);
# Line 201  while (length-- > 0) Line 445  while (length-- > 0)
445        {        {
446        length -= rc - 1;        length -= rc - 1;
447        p += rc;        p += rc;
448        if (c < 256 && isprint(c))        if (PRINTHEX(c))
449          {          {
450          if (f != NULL) fprintf(f, "%c", c);          if (f != NULL) fprintf(f, "%c", c);
451          yield++;          yield++;
452          }          }
453        else        else
454          {          {
455          int n;          int n = 4;
456          if (f != NULL) fprintf(f, "\\x{%02x}%n", c, &n);          if (f != NULL) fprintf(f, "\\x{%02x}", c);
457          yield += n;          yield += (n <= 0x000000ff)? 2 :
458                     (n <= 0x00000fff)? 3 :
459                     (n <= 0x0000ffff)? 4 :
460                     (n <= 0x000fffff)? 5 : 6;
461          }          }
462        continue;        continue;
463        }        }
464      }      }
465    #endif
466    
467     /* Not UTF-8, or malformed UTF-8  */     /* Not UTF-8, or malformed UTF-8  */
468    
469    if (isprint(c = *(p++)))    c = *p++;
470      if (PRINTHEX(c))
471      {      {
472      if (f != NULL) fprintf(f, "%c", c);      if (f != NULL) fprintf(f, "%c", c);
473      yield++;      yield++;
# Line 392  if ((rc = pcre_fullinfo(re, study, optio Line 641  if ((rc = pcre_fullinfo(re, study, optio
641  *         Byte flipping function                 *  *         Byte flipping function                 *
642  *************************************************/  *************************************************/
643    
644  static long int  static unsigned long int
645  byteflip(long int value, int n)  byteflip(unsigned long int value, int n)
646  {  {
647  if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);  if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
648  return ((value & 0x000000ff) << 24) |  return ((value & 0x000000ff) << 24) |
# Line 406  return ((value & 0x000000ff) << 24) | Line 655  return ((value & 0x000000ff) << 24) |
655    
656    
657  /*************************************************  /*************************************************
658    *        Check match or recursion limit          *
659    *************************************************/
660    
661    static int
662    check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
663      int start_offset, int options, int *use_offsets, int use_size_offsets,
664      int flag, unsigned long int *limit, int errnumber, const char *msg)
665    {
666    int count;
667    int min = 0;
668    int mid = 64;
669    int max = -1;
670    
671    extra->flags |= flag;
672    
673    for (;;)
674      {
675      *limit = mid;
676    
677      count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
678        use_offsets, use_size_offsets);
679    
680      if (count == errnumber)
681        {
682        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
683        min = mid;
684        mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
685        }
686    
687      else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
688                             count == PCRE_ERROR_PARTIAL)
689        {
690        if (mid == min + 1)
691          {
692          fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
693          break;
694          }
695        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
696        max = mid;
697        mid = (min + mid)/2;
698        }
699      else break;    /* Some other error */
700      }
701    
702    extra->flags &= ~flag;
703    return count;
704    }
705    
706    
707    
708    /*************************************************
709    *         Case-independent strncmp() function    *
710    *************************************************/
711    
712    /*
713    Arguments:
714      s         first string
715      t         second string
716      n         number of characters to compare
717    
718    Returns:    < 0, = 0, or > 0, according to the comparison
719    */
720    
721    static int
722    strncmpic(uschar *s, uschar *t, int n)
723    {
724    while (n--)
725      {
726      int c = tolower(*s++) - tolower(*t++);
727      if (c) return c;
728      }
729    return 0;
730    }
731    
732    
733    
734    /*************************************************
735    *         Check newline indicator                *
736    *************************************************/
737    
738    /* This is used both at compile and run-time to check for <xxx> escapes, where
739    xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is
740    no match.
741    
742    Arguments:
743      p           points after the leading '<'
744      f           file for error message
745    
746    Returns:      appropriate PCRE_NEWLINE_xxx flags, or 0
747    */
748    
749    static int
750    check_newline(uschar *p, FILE *f)
751    {
752    if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
753    if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
754    if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
755    if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
756    if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
757    if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
758    if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
759    fprintf(f, "Unknown newline type at: <%s\n", p);
760    return 0;
761    }
762    
763    
764    
765    /*************************************************
766    *             Usage function                     *
767    *************************************************/
768    
769    static void
770    usage(void)
771    {
772    printf("Usage:     pcretest [options] [<input file> [<output file>]]\n\n");
773    printf("Input and output default to stdin and stdout.\n");
774    #ifdef SUPPORT_LIBREADLINE
775    printf("If input is a terminal, readline() is used to read from it.\n");
776    #else
777    printf("This version of pcretest is not linked with readline().\n");
778    #endif
779    printf("\nOptions:\n");
780    printf("  -b       show compiled code (bytecode)\n");
781    printf("  -C       show PCRE compile-time options and exit\n");
782    printf("  -d       debug: show compiled code and information (-b and -i)\n");
783    #if !defined NODFA
784    printf("  -dfa     force DFA matching for all subjects\n");
785    #endif
786    printf("  -help    show usage information\n");
787    printf("  -i       show information about compiled patterns\n"
788           "  -m       output memory used information\n"
789           "  -o <n>   set size of offsets vector to <n>\n");
790    #if !defined NOPOSIX
791    printf("  -p       use POSIX interface\n");
792    #endif
793    printf("  -q       quiet: do not output PCRE version number at start\n");
794    printf("  -S <n>   set stack size to <n> megabytes\n");
795    printf("  -s       output store (memory) used information\n"
796           "  -t       time compilation and execution\n");
797    printf("  -t <n>   time compilation and execution, repeating <n> times\n");
798    printf("  -tm      time execution (matching) only\n");
799    printf("  -tm <n>  time execution (matching) only, repeating <n> times\n");
800    }
801    
802    
803    
804    /*************************************************
805  *                Main Program                    *  *                Main Program                    *
806  *************************************************/  *************************************************/
807    
# Line 420  int options = 0; Line 816  int options = 0;
816  int study_options = 0;  int study_options = 0;
817  int op = 1;  int op = 1;
818  int timeit = 0;  int timeit = 0;
819    int timeitm = 0;
820  int showinfo = 0;  int showinfo = 0;
821  int showstore = 0;  int showstore = 0;
822    int quiet = 0;
823  int size_offsets = 45;  int size_offsets = 45;
824  int size_offsets_max;  int size_offsets_max;
825  int *offsets = NULL;  int *offsets = NULL;
# Line 432  int debug = 0; Line 830  int debug = 0;
830  int done = 0;  int done = 0;
831  int all_use_dfa = 0;  int all_use_dfa = 0;
832  int yield = 0;  int yield = 0;
833    int stack_size;
834    
835    /* These vectors store, end-to-end, a list of captured substring names. Assume
836    that 1024 is plenty long enough for the few names we'll be testing. */
837    
838    uschar copynames[1024];
839    uschar getnames[1024];
840    
841  unsigned char *buffer;  uschar *copynamesptr;
842  unsigned char *dbuffer;  uschar *getnamesptr;
843    
844  /* Get buffers from malloc() so that Electric Fence will check their misuse  /* Get buffers from malloc() so that Electric Fence will check their misuse
845  when I am debugging. */  when I am debugging. They grow automatically when very long lines are read. */
846    
847  buffer = (unsigned char *)malloc(BUFFER_SIZE);  buffer = (unsigned char *)malloc(buffer_size);
848  dbuffer = (unsigned char *)malloc(DBUFFER_SIZE);  dbuffer = (unsigned char *)malloc(buffer_size);
849  pbuffer = (unsigned char *)malloc(PBUFFER_SIZE);  pbuffer = (unsigned char *)malloc(buffer_size);
   
 /* The outfile variable is static so that new_malloc can use it. The _setmode()  
 stuff is some magic that I don't understand, but which apparently does good  
 things in Windows. It's related to line terminations.  */  
850    
851  #if defined(_WIN32) || defined(WIN32)  /* The outfile variable is static so that new_malloc can use it. */
 _setmode( _fileno( stdout ), 0x8000 );  
 #endif  /* defined(_WIN32) || defined(WIN32) */  
852    
853  outfile = stdout;  outfile = stdout;
854    
855    /* The following  _setmode() stuff is some Windows magic that tells its runtime
856    library to translate CRLF into a single LF character. At least, that's what
857    I've been told: never having used Windows I take this all on trust. Originally
858    it set 0x8000, but then I was advised that _O_BINARY was better. */
859    
860    #if defined(_WIN32) || defined(WIN32)
861    _setmode( _fileno( stdout ), _O_BINARY );
862    #endif
863    
864  /* Scan options */  /* Scan options */
865    
866  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
# Line 461  while (argc > 1 && argv[op][0] == '-') Line 869  while (argc > 1 && argv[op][0] == '-')
869    
870    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
871      showstore = 1;      showstore = 1;
872    else if (strcmp(argv[op], "-t") == 0) timeit = 1;    else if (strcmp(argv[op], "-q") == 0) quiet = 1;
873      else if (strcmp(argv[op], "-b") == 0) debug = 1;
874    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
875    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
876    #if !defined NODFA
877    else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;    else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
878    #endif
879    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
880        ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),        ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
881          *endptr == 0))          *endptr == 0))
# Line 472  while (argc > 1 && argv[op][0] == '-') Line 883  while (argc > 1 && argv[op][0] == '-')
883      op++;      op++;
884      argc--;      argc--;
885      }      }
886      else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
887        {
888        int both = argv[op][2] == 0;
889        int temp;
890        if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
891                         *endptr == 0))
892          {
893          timeitm = temp;
894          op++;
895          argc--;
896          }
897        else timeitm = LOOPREPEAT;
898        if (both) timeit = timeitm;
899        }
900      else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
901          ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
902            *endptr == 0))
903        {
904    #if defined(_WIN32) || defined(WIN32)
905        printf("PCRE: -S not supported on this OS\n");
906        exit(1);
907    #else
908        int rc;
909        struct rlimit rlim;
910        getrlimit(RLIMIT_STACK, &rlim);
911        rlim.rlim_cur = stack_size * 1024 * 1024;
912        rc = setrlimit(RLIMIT_STACK, &rlim);
913        if (rc != 0)
914          {
915        printf("PCRE: setrlimit() failed with error %d\n", rc);
916        exit(1);
917          }
918        op++;
919        argc--;
920    #endif
921        }
922  #if !defined NOPOSIX  #if !defined NOPOSIX
923    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(argv[op], "-p") == 0) posix = 1;
924  #endif  #endif
# Line 485  while (argc > 1 && argv[op][0] == '-') Line 932  while (argc > 1 && argv[op][0] == '-')
932      (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);      (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
933      printf("  %sUnicode properties support\n", rc? "" : "No ");      printf("  %sUnicode properties support\n", rc? "" : "No ");
934      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
935      printf("  Newline character is %s\n", (rc == '\r')? "CR" : "LF");      printf("  Newline sequence is %s\n", (rc == '\r')? "CR" :
936          (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :
937          (rc == -2)? "ANYCRLF" :
938          (rc == -1)? "ANY" : "???");
939        (void)pcre_config(PCRE_CONFIG_BSR, &rc);
940        printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :
941                                         "all Unicode newlines");
942      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
943      printf("  Internal link size = %d\n", rc);      printf("  Internal link size = %d\n", rc);
944      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
945      printf("  POSIX malloc threshold = %d\n", rc);      printf("  POSIX malloc threshold = %d\n", rc);
946      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
947      printf("  Default match limit = %d\n", rc);      printf("  Default match limit = %d\n", rc);
948        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);
949        printf("  Default recursion depth limit = %d\n", rc);
950      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
951      printf("  Match recursion uses %s\n", rc? "stack" : "heap");      printf("  Match recursion uses %s\n", rc? "stack" : "heap");
952      exit(0);      goto EXIT;
953        }
954      else if (strcmp(argv[op], "-help") == 0 ||
955               strcmp(argv[op], "--help") == 0)
956        {
957        usage();
958        goto EXIT;
959      }      }
960    else    else
961      {      {
962      printf("** Unknown or malformed option %s\n", argv[op]);      printf("** Unknown or malformed option %s\n", argv[op]);
963      printf("Usage:   pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");      usage();
     printf("  -C     show PCRE compile-time options and exit\n");  
     printf("  -d     debug: show compiled code; implies -i\n");  
     printf("  -dfa   force DFA matching for all subjects\n");  
     printf("  -i     show information about compiled pattern\n"  
            "  -m     output memory used information\n"  
            "  -o <n> set size of offsets vector to <n>\n");  
 #if !defined NOPOSIX  
     printf("  -p     use POSIX interface\n");  
 #endif  
     printf("  -s     output store (memory) used information\n"  
            "  -t     time compilation and execution\n");  
964      yield = 1;      yield = 1;
965      goto EXIT;      goto EXIT;
966      }      }
# Line 525  offsets = (int *)malloc(size_offsets_max Line 975  offsets = (int *)malloc(size_offsets_max
975  if (offsets == NULL)  if (offsets == NULL)
976    {    {
977    printf("** Failed to get %d bytes of memory for offsets vector\n",    printf("** Failed to get %d bytes of memory for offsets vector\n",
978      size_offsets_max * sizeof(int));      (int)(size_offsets_max * sizeof(int)));
979    yield = 1;    yield = 1;
980    goto EXIT;    goto EXIT;
981    }    }
# Line 534  if (offsets == NULL) Line 984  if (offsets == NULL)
984    
985  if (argc > 1)  if (argc > 1)
986    {    {
987    infile = fopen(argv[op], "rb");    infile = fopen(argv[op], INPUT_MODE);
988    if (infile == NULL)    if (infile == NULL)
989      {      {
990      printf("** Failed to open %s\n", argv[op]);      printf("** Failed to open %s\n", argv[op]);
# Line 545  if (argc > 1) Line 995  if (argc > 1)
995    
996  if (argc > 2)  if (argc > 2)
997    {    {
998    outfile = fopen(argv[op+1], "wb");    outfile = fopen(argv[op+1], OUTPUT_MODE);
999    if (outfile == NULL)    if (outfile == NULL)
1000      {      {
1001      printf("** Failed to open %s\n", argv[op+1]);      printf("** Failed to open %s\n", argv[op+1]);
# Line 561  pcre_free = new_free; Line 1011  pcre_free = new_free;
1011  pcre_stack_malloc = stack_malloc;  pcre_stack_malloc = stack_malloc;
1012  pcre_stack_free = stack_free;  pcre_stack_free = stack_free;
1013    
1014  /* Heading line, then prompt for first regex if stdin */  /* Heading line unless quiet, then prompt for first regex if stdin */
1015    
1016  fprintf(outfile, "PCRE version %s\n\n", pcre_version());  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
1017    
1018  /* Main loop */  /* Main loop */
1019    
# Line 590  while (!done) Line 1040  while (!done)
1040    int do_showinfo = showinfo;    int do_showinfo = showinfo;
1041    int do_showrest = 0;    int do_showrest = 0;
1042    int do_flip = 0;    int do_flip = 0;
1043    int erroroffset, len, delimiter;    int erroroffset, len, delimiter, poffset;
1044    
1045    use_utf8 = 0;    use_utf8 = 0;
1046      debug_lengths = 1;
1047    
1048    if (infile == stdin) printf("  re> ");    if (extend_inputline(infile, buffer, "  re> ") == NULL) break;
   if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL) break;  
1049    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1050    fflush(outfile);    fflush(outfile);
1051    
# Line 607  while (!done) Line 1057  while (!done)
1057    
1058    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1059      {      {
1060      unsigned long int magic;      unsigned long int magic, get_options;
1061      uschar sbuf[8];      uschar sbuf[8];
1062      FILE *f;      FILE *f;
1063    
# Line 655  while (!done) Line 1105  while (!done)
1105    
1106      /* Need to know if UTF-8 for printing data strings */      /* Need to know if UTF-8 for printing data strings */
1107    
1108      new_info(re, NULL, PCRE_INFO_OPTIONS, &options);      new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1109      use_utf8 = (options & PCRE_UTF8) != 0;      use_utf8 = (get_options & PCRE_UTF8) != 0;
1110    
1111      /* Now see if there is any following study data */      /* Now see if there is any following study data */
1112    
# Line 695  while (!done) Line 1145  while (!done)
1145    
1146    if (isalnum(delimiter) || delimiter == '\\')    if (isalnum(delimiter) || delimiter == '\\')
1147      {      {
1148      fprintf(outfile, "** Delimiter must not be alphameric or \\\n");      fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1149      goto SKIP_DATA;      goto SKIP_DATA;
1150      }      }
1151    
1152    pp = p;    pp = p;
1153      poffset = p - buffer;
1154    
1155    for(;;)    for(;;)
1156      {      {
# Line 710  while (!done) Line 1161  while (!done)
1161        pp++;        pp++;
1162        }        }
1163      if (*pp != 0) break;      if (*pp != 0) break;
1164        if ((pp = extend_inputline(infile, pp, "    > ")) == NULL)
     len = BUFFER_SIZE - (pp - buffer);  
     if (len < 256)  
       {  
       fprintf(outfile, "** Expression too long - missing delimiter?\n");  
       goto SKIP_DATA;  
       }  
   
     if (infile == stdin) printf("    > ");  
     if (fgets((char *)pp, len, infile) == NULL)  
1165        {        {
1166        fprintf(outfile, "** Unexpected EOF\n");        fprintf(outfile, "** Unexpected EOF\n");
1167        done = 1;        done = 1;
# Line 728  while (!done) Line 1170  while (!done)
1170      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1171      }      }
1172    
1173      /* The buffer may have moved while being extended; reset the start of data
1174      pointer to the correct relative point in the buffer. */
1175    
1176      p = buffer + poffset;
1177    
1178    /* If the first character after the delimiter is backslash, make    /* If the first character after the delimiter is backslash, make
1179    the pattern end with backslash. This is purely to provide a way    the pattern end with backslash. This is purely to provide a way
1180    of testing for the error message when a pattern ends with backslash. */    of testing for the error message when a pattern ends with backslash. */
# Line 759  while (!done) Line 1206  while (!done)
1206    
1207        case '+': do_showrest = 1; break;        case '+': do_showrest = 1; break;
1208        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
1209          case 'B': do_debug = 1; break;
1210        case 'C': options |= PCRE_AUTO_CALLOUT; break;        case 'C': options |= PCRE_AUTO_CALLOUT; break;
1211        case 'D': do_debug = do_showinfo = 1; break;        case 'D': do_debug = do_showinfo = 1; break;
1212        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1213        case 'F': do_flip = 1; break;        case 'F': do_flip = 1; break;
1214        case 'G': do_G = 1; break;        case 'G': do_G = 1; break;
1215        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
1216          case 'J': options |= PCRE_DUPNAMES; break;
1217        case 'M': log_store = 1; break;        case 'M': log_store = 1; break;
1218        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1219    
# Line 775  while (!done) Line 1224  while (!done)
1224        case 'S': do_study = 1; break;        case 'S': do_study = 1; break;
1225        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
1226        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
1227          case 'Z': debug_lengths = 0; break;
1228        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1229        case '?': options |= PCRE_NO_UTF8_CHECK; break;        case '?': options |= PCRE_NO_UTF8_CHECK; break;
1230    
1231        case 'L':        case 'L':
1232        ppp = pp;        ppp = pp;
1233        /* The '\r' test here is so that it works on Windows */        /* The '\r' test here is so that it works on Windows. */
1234        while (*ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;        /* The '0' test is just in case this is an unterminated line. */
1235          while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1236        *ppp = 0;        *ppp = 0;
1237        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1238          {          {
1239          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1240          goto SKIP_DATA;          goto SKIP_DATA;
1241          }          }
1242          locale_set = 1;
1243        tables = pcre_maketables();        tables = pcre_maketables();
1244        pp = ppp;        pp = ppp;
1245        break;        break;
# Line 799  while (!done) Line 1251  while (!done)
1251        *pp = 0;        *pp = 0;
1252        break;        break;
1253    
1254          case '<':
1255            {
1256            if (strncmp((char *)pp, "JS>", 3) == 0)
1257              {
1258              options |= PCRE_JAVASCRIPT_COMPAT;
1259              pp += 3;
1260              }
1261            else
1262              {
1263              int x = check_newline(pp, outfile);
1264              if (x == 0) goto SKIP_DATA;
1265              options |= x;
1266              while (*pp++ != '>');
1267              }
1268            }
1269          break;
1270    
1271        case '\r':                      /* So that it works in Windows */        case '\r':                      /* So that it works in Windows */
1272        case '\n':        case '\n':
1273        case ' ':        case ' ':
# Line 823  while (!done) Line 1292  while (!done)
1292      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1293      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1294      if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;      if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1295        if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1296        if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1297    
1298      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
1299    
1300      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 830  while (!done) Line 1302  while (!done)
1302    
1303      if (rc != 0)      if (rc != 0)
1304        {        {
1305        (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);        (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1306        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1307        goto SKIP_DATA;        goto SKIP_DATA;
1308        }        }
# Line 842  while (!done) Line 1314  while (!done)
1314  #endif  /* !defined NOPOSIX */  #endif  /* !defined NOPOSIX */
1315    
1316      {      {
1317      if (timeit)      if (timeit > 0)
1318        {        {
1319        register int i;        register int i;
1320        clock_t time_taken;        clock_t time_taken;
1321        clock_t start_time = clock();        clock_t start_time = clock();
1322        for (i = 0; i < LOOPREPEAT; i++)        for (i = 0; i < timeit; i++)
1323          {          {
1324          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1325          if (re != NULL) free(re);          if (re != NULL) free(re);
1326          }          }
1327        time_taken = clock() - start_time;        time_taken = clock() - start_time;
1328        fprintf(outfile, "Compile time %.3f milliseconds\n",        fprintf(outfile, "Compile time %.4f milliseconds\n",
1329          (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /          (((double)time_taken * 1000.0) / (double)timeit) /
1330            (double)CLOCKS_PER_SEC);            (double)CLOCKS_PER_SEC);
1331        }        }
1332    
# Line 871  while (!done) Line 1343  while (!done)
1343          {          {
1344          for (;;)          for (;;)
1345            {            {
1346            if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)            if (extend_inputline(infile, buffer, NULL) == NULL)
1347              {              {
1348              done = 1;              done = 1;
1349              goto CONTINUE;              goto CONTINUE;
# Line 906  while (!done) Line 1378  while (!done)
1378    
1379      if (do_study)      if (do_study)
1380        {        {
1381        if (timeit)        if (timeit > 0)
1382          {          {
1383          register int i;          register int i;
1384          clock_t time_taken;          clock_t time_taken;
1385          clock_t start_time = clock();          clock_t start_time = clock();
1386          for (i = 0; i < LOOPREPEAT; i++)          for (i = 0; i < timeit; i++)
1387            extra = pcre_study(re, study_options, &error);            extra = pcre_study(re, study_options, &error);
1388          time_taken = clock() - start_time;          time_taken = clock() - start_time;
1389          if (extra != NULL) free(extra);          if (extra != NULL) free(extra);
1390          fprintf(outfile, "  Study time %.3f milliseconds\n",          fprintf(outfile, "  Study time %.4f milliseconds\n",
1391            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /            (((double)time_taken * 1000.0) / (double)timeit) /
1392              (double)CLOCKS_PER_SEC);              (double)CLOCKS_PER_SEC);
1393          }          }
1394        extra = pcre_study(re, study_options, &error);        extra = pcre_study(re, study_options, &error);
# Line 934  while (!done) Line 1406  while (!done)
1406      if (do_flip)      if (do_flip)
1407        {        {
1408        real_pcre *rre = (real_pcre *)re;        real_pcre *rre = (real_pcre *)re;
1409        rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));        rre->magic_number =
1410            byteflip(rre->magic_number, sizeof(rre->magic_number));
1411        rre->size = byteflip(rre->size, sizeof(rre->size));        rre->size = byteflip(rre->size, sizeof(rre->size));
1412        rre->options = byteflip(rre->options, sizeof(rre->options));        rre->options = byteflip(rre->options, sizeof(rre->options));
1413        rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));        rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
1414        rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));        rre->top_bracket =
1415        rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));          (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1416        rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));        rre->top_backref =
1417        rre->name_table_offset = byteflip(rre->name_table_offset,          (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1418          rre->first_byte =
1419            (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
1420          rre->req_byte =
1421            (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
1422          rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1423          sizeof(rre->name_table_offset));          sizeof(rre->name_table_offset));
1424        rre->name_entry_size = byteflip(rre->name_entry_size,        rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
1425          sizeof(rre->name_entry_size));          sizeof(rre->name_entry_size));
1426        rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));        rre->name_count = (pcre_uint16)byteflip(rre->name_count,
1427            sizeof(rre->name_count));
1428    
1429        if (extra != NULL)        if (extra != NULL)
1430          {          {
# Line 959  while (!done) Line 1438  while (!done)
1438    
1439      SHOW_INFO:      SHOW_INFO:
1440    
1441        if (do_debug)
1442          {
1443          fprintf(outfile, "------------------------------------------------------------------\n");
1444          pcre_printint(re, outfile, debug_lengths);
1445          }
1446    
1447      if (do_showinfo)      if (do_showinfo)
1448        {        {
1449        unsigned long int get_options, all_options;        unsigned long int get_options, all_options;
1450    #if !defined NOINFOCHECK
1451        int old_first_char, old_options, old_count;        int old_first_char, old_options, old_count;
1452        int count, backrefmax, first_char, need_char;  #endif
1453          int count, backrefmax, first_char, need_char, okpartial, jchanged,
1454            hascrorlf;
1455        int nameentrysize, namecount;        int nameentrysize, namecount;
1456        const uschar *nametable;        const uschar *nametable;
1457    
       if (do_debug)  
         {  
         fprintf(outfile, "------------------------------------------------------------------\n");  
         _pcre_printint(re, outfile);  
         }  
   
1458        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1459        new_info(re, NULL, PCRE_INFO_SIZE, &size);        new_info(re, NULL, PCRE_INFO_SIZE, &size);
1460        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
# Line 982  while (!done) Line 1464  while (!done)
1464        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1465        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1466        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1467          new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1468          new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1469          new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1470    
1471    #if !defined NOINFOCHECK
1472        old_count = pcre_info(re, &old_options, &old_first_char);        old_count = pcre_info(re, &old_options, &old_first_char);
1473        if (count < 0) fprintf(outfile,        if (count < 0) fprintf(outfile,
1474          "Error %d from pcre_info()\n", count);          "Error %d from pcre_info()\n", count);
# Line 1000  while (!done) Line 1486  while (!done)
1486            "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",            "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1487              get_options, old_options);              get_options, old_options);
1488          }          }
1489    #endif
1490    
1491        if (size != regex_gotten_store) fprintf(outfile,        if (size != regex_gotten_store) fprintf(outfile,
1492          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
# Line 1021  while (!done) Line 1508  while (!done)
1508            }            }
1509          }          }
1510    
1511        /* The NOPARTIAL bit is a private bit in the options, so we have        if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1512        to fish it out via out back door */        if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
1513    
1514        all_options = ((real_pcre *)re)->options;        all_options = ((real_pcre *)re)->options;
1515        if (do_flip)        if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
         {  
         all_options = byteflip(all_options, sizeof(all_options));  
         }  
   
       if ((all_options & PCRE_NOPARTIAL) != 0)  
         fprintf(outfile, "Partial matching not supported\n");  
1516    
1517        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
1518          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1519            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1520            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1521            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1522            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1523            ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",            ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1524            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1525              ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
1526              ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
1527            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1528            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1529            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1530              ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1531            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1532            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "");            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1533              ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1534    
1535        if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)        if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1536          fprintf(outfile, "Case state changes\n");  
1537          switch (get_options & PCRE_NEWLINE_BITS)
1538            {
1539            case PCRE_NEWLINE_CR:
1540            fprintf(outfile, "Forced newline sequence: CR\n");
1541            break;
1542    
1543            case PCRE_NEWLINE_LF:
1544            fprintf(outfile, "Forced newline sequence: LF\n");
1545            break;
1546    
1547            case PCRE_NEWLINE_CRLF:
1548            fprintf(outfile, "Forced newline sequence: CRLF\n");
1549            break;
1550    
1551            case PCRE_NEWLINE_ANYCRLF:
1552            fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1553            break;
1554    
1555            case PCRE_NEWLINE_ANY:
1556            fprintf(outfile, "Forced newline sequence: ANY\n");
1557            break;
1558    
1559            default:
1560            break;
1561            }
1562    
1563        if (first_char == -1)        if (first_char == -1)
1564          {          {
1565          fprintf(outfile, "First char at start or follows \\n\n");          fprintf(outfile, "First char at start or follows newline\n");
1566          }          }
1567        else if (first_char < 0)        else if (first_char < 0)
1568          {          {
# Line 1063  while (!done) Line 1573  while (!done)
1573          int ch = first_char & 255;          int ch = first_char & 255;
1574          const char *caseless = ((first_char & REQ_CASELESS) == 0)?          const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1575            "" : " (caseless)";            "" : " (caseless)";
1576          if (isprint(ch))          if (PRINTHEX(ch))
1577            fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);            fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1578          else          else
1579            fprintf(outfile, "First char = %d%s\n", ch, caseless);            fprintf(outfile, "First char = %d%s\n", ch, caseless);
# Line 1078  while (!done) Line 1588  while (!done)
1588          int ch = need_char & 255;          int ch = need_char & 255;
1589          const char *caseless = ((need_char & REQ_CASELESS) == 0)?          const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1590            "" : " (caseless)";            "" : " (caseless)";
1591          if (isprint(ch))          if (PRINTHEX(ch))
1592            fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);            fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1593          else          else
1594            fprintf(outfile, "Need char = %d%s\n", ch, caseless);            fprintf(outfile, "Need char = %d%s\n", ch, caseless);
# Line 1114  while (!done) Line 1624  while (!done)
1624                    fprintf(outfile, "\n  ");                    fprintf(outfile, "\n  ");
1625                    c = 2;                    c = 2;
1626                    }                    }
1627                  if (isprint(i) && i != ' ')                  if (PRINTHEX(i) && i != ' ')
1628                    {                    {
1629                    fprintf(outfile, "%c ", i);                    fprintf(outfile, "%c ", i);
1630                    c += 2;                    c += 2;
# Line 1146  while (!done) Line 1656  while (!done)
1656        else        else
1657          {          {
1658          uschar sbuf[8];          uschar sbuf[8];
1659          sbuf[0] = (true_size >> 24)  & 255;          sbuf[0] = (uschar)((true_size >> 24) & 255);
1660          sbuf[1] = (true_size >> 16)  & 255;          sbuf[1] = (uschar)((true_size >> 16) & 255);
1661          sbuf[2] = (true_size >>  8)  & 255;          sbuf[2] = (uschar)((true_size >>  8) & 255);
1662          sbuf[3] = (true_size)  & 255;          sbuf[3] = (uschar)((true_size) & 255);
1663    
1664          sbuf[4] = (true_study_size >> 24)  & 255;          sbuf[4] = (uschar)((true_study_size >> 24) & 255);
1665          sbuf[5] = (true_study_size >> 16)  & 255;          sbuf[5] = (uschar)((true_study_size >> 16) & 255);
1666          sbuf[6] = (true_study_size >>  8)  & 255;          sbuf[6] = (uschar)((true_study_size >>  8) & 255);
1667          sbuf[7] = (true_study_size)  & 255;          sbuf[7] = (uschar)((true_study_size) & 255);
1668    
1669          if (fwrite(sbuf, 1, 8, f) < 8 ||          if (fwrite(sbuf, 1, 8, f) < 8 ||
1670              fwrite(re, 1, true_size, f) < true_size)              fwrite(re, 1, true_size, f) < true_size)
# Line 1173  while (!done) Line 1683  while (!done)
1683                  strerror(errno));                  strerror(errno));
1684                }                }
1685              else fprintf(outfile, "Study data written to %s\n", to_file);              else fprintf(outfile, "Study data written to %s\n", to_file);
1686    
1687              }              }
1688            }            }
1689          fclose(f);          fclose(f);
# Line 1189  while (!done) Line 1700  while (!done)
1700    
1701    for (;;)    for (;;)
1702      {      {
1703      unsigned char *q;      uschar *q;
1704      unsigned char *bptr = dbuffer;      uschar *bptr;
1705      int *use_offsets = offsets;      int *use_offsets = offsets;
1706      int use_size_offsets = size_offsets;      int use_size_offsets = size_offsets;
1707      int callout_data = 0;      int callout_data = 0;
# Line 1207  while (!done) Line 1718  while (!done)
1718    
1719      options = 0;      options = 0;
1720    
1721        *copynames = 0;
1722        *getnames = 0;
1723    
1724        copynamesptr = copynames;
1725        getnamesptr = getnames;
1726    
1727      pcre_callout = callout;      pcre_callout = callout;
1728      first_callout = 1;      first_callout = 1;
1729      callout_extra = 0;      callout_extra = 0;
# Line 1215  while (!done) Line 1732  while (!done)
1732      callout_fail_id = -1;      callout_fail_id = -1;
1733      show_malloc = 0;      show_malloc = 0;
1734    
1735      if (infile == stdin) printf("data> ");      if (extra != NULL) extra->flags &=
1736      if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)        ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1737    
1738        len = 0;
1739        for (;;)
1740        {        {
1741        done = 1;        if (extend_inputline(infile, buffer + len, "data> ") == NULL)
1742        goto CONTINUE;          {
1743            if (len > 0) break;
1744            done = 1;
1745            goto CONTINUE;
1746            }
1747          if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1748          len = (int)strlen((char *)buffer);
1749          if (buffer[len-1] == '\n') break;
1750        }        }
     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);  
1751    
     len = (int)strlen((char *)buffer);  
1752      while (len > 0 && isspace(buffer[len-1])) len--;      while (len > 0 && isspace(buffer[len-1])) len--;
1753      buffer[len] = 0;      buffer[len] = 0;
1754      if (len == 0) break;      if (len == 0) break;
# Line 1231  while (!done) Line 1756  while (!done)
1756      p = buffer;      p = buffer;
1757      while (isspace(*p)) p++;      while (isspace(*p)) p++;
1758    
1759      q = dbuffer;      bptr = q = dbuffer;
1760      while ((c = *p++) != 0)      while ((c = *p++) != 0)
1761        {        {
1762        int i = 0;        int i = 0;
# Line 1253  while (!done) Line 1778  while (!done)
1778          c -= '0';          c -= '0';
1779          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1780            c = c * 8 + *p++ - '0';            c = c * 8 + *p++ - '0';
1781    
1782    #if !defined NOUTF8
1783            if (use_utf8 && c > 255)
1784              {
1785              unsigned char buff8[8];
1786              int ii, utn;
1787              utn = ord2utf8(c, buff8);
1788              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1789              c = buff8[ii];   /* Last byte */
1790              }
1791    #endif
1792          break;          break;
1793    
1794          case 'x':          case 'x':
1795    
1796          /* Handle \x{..} specially - new Perl thing for utf8 */          /* Handle \x{..} specially - new Perl thing for utf8 */
1797    
1798    #if !defined NOUTF8
1799          if (*p == '{')          if (*p == '{')
1800            {            {
1801            unsigned char *pt = p;            unsigned char *pt = p;
# Line 1269  while (!done) Line 1806  while (!done)
1806              {              {
1807              unsigned char buff8[8];              unsigned char buff8[8];
1808              int ii, utn;              int ii, utn;
1809              utn = _pcre_ord2utf8(c, buff8);              if (use_utf8)
1810              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];                {
1811              c = buff8[ii];   /* Last byte */                utn = ord2utf8(c, buff8);
1812                  for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1813                  c = buff8[ii];   /* Last byte */
1814                  }
1815                else
1816                 {
1817                 if (c > 255)
1818                   fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
1819                     "UTF-8 mode is not enabled.\n"
1820                     "** Truncation will probably give the wrong result.\n", c);
1821                 }
1822              p = pt + 1;              p = pt + 1;
1823              break;              break;
1824              }              }
1825            /* Not correct form; fall through */            /* Not correct form; fall through */
1826            }            }
1827    #endif
1828    
1829          /* Ordinary \x */          /* Ordinary \x */
1830    
# Line 1312  while (!done) Line 1860  while (!done)
1860            }            }
1861          else if (isalnum(*p))          else if (isalnum(*p))
1862            {            {
1863            uschar name[256];            uschar *npp = copynamesptr;
           uschar *npp = name;  
1864            while (isalnum(*p)) *npp++ = *p++;            while (isalnum(*p)) *npp++ = *p++;
1865              *npp++ = 0;
1866            *npp = 0;            *npp = 0;
1867            n = pcre_get_stringnumber(re, (char *)name);            n = pcre_get_stringnumber(re, (char *)copynamesptr);
1868            if (n < 0)            if (n < 0)
1869              fprintf(outfile, "no parentheses with name \"%s\"\n", name);              fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1870            else copystrings |= 1 << n;            copynamesptr = npp;
1871            }            }
1872          else if (*p == '+')          else if (*p == '+')
1873            {            {
# Line 1357  while (!done) Line 1905  while (!done)
1905            }            }
1906          continue;          continue;
1907    
1908    #if !defined NODFA
1909          case 'D':          case 'D':
1910    #if !defined NOPOSIX
1911          if (posix || do_posix)          if (posix || do_posix)
1912            printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");            printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1913          else          else
1914    #endif
1915            use_dfa = 1;            use_dfa = 1;
1916          continue;          continue;
1917    
1918          case 'F':          case 'F':
1919          options |= PCRE_DFA_SHORTEST;          options |= PCRE_DFA_SHORTEST;
1920          continue;          continue;
1921    #endif
1922    
1923          case 'G':          case 'G':
1924          if (isdigit(*p))          if (isdigit(*p))
# Line 1376  while (!done) Line 1928  while (!done)
1928            }            }
1929          else if (isalnum(*p))          else if (isalnum(*p))
1930            {            {
1931            uschar name[256];            uschar *npp = getnamesptr;
           uschar *npp = name;  
1932            while (isalnum(*p)) *npp++ = *p++;            while (isalnum(*p)) *npp++ = *p++;
1933              *npp++ = 0;
1934            *npp = 0;            *npp = 0;
1935            n = pcre_get_stringnumber(re, (char *)name);            n = pcre_get_stringnumber(re, (char *)getnamesptr);
1936            if (n < 0)            if (n < 0)
1937              fprintf(outfile, "no parentheses with name \"%s\"\n", name);              fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1938            else getstrings |= 1 << n;            getnamesptr = npp;
1939            }            }
1940          continue;          continue;
1941    
# Line 1409  while (!done) Line 1961  while (!done)
1961            if (offsets == NULL)            if (offsets == NULL)
1962              {              {
1963              printf("** Failed to get %d bytes of memory for offsets vector\n",              printf("** Failed to get %d bytes of memory for offsets vector\n",
1964                size_offsets_max * sizeof(int));                (int)(size_offsets_max * sizeof(int)));
1965              yield = 1;              yield = 1;
1966              goto EXIT;              goto EXIT;
1967              }              }
# Line 1422  while (!done) Line 1974  while (!done)
1974          options |= PCRE_PARTIAL;          options |= PCRE_PARTIAL;
1975          continue;          continue;
1976    
1977            case 'Q':
1978            while(isdigit(*p)) n = n * 10 + *p++ - '0';
1979            if (extra == NULL)
1980              {
1981              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1982              extra->flags = 0;
1983              }
1984            extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1985            extra->match_limit_recursion = n;
1986            continue;
1987    
1988            case 'q':
1989            while(isdigit(*p)) n = n * 10 + *p++ - '0';
1990            if (extra == NULL)
1991              {
1992              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1993              extra->flags = 0;
1994              }
1995            extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1996            extra->match_limit = n;
1997            continue;
1998    
1999    #if !defined NODFA
2000          case 'R':          case 'R':
2001          options |= PCRE_DFA_RESTART;          options |= PCRE_DFA_RESTART;
2002          continue;          continue;
2003    #endif
2004    
2005          case 'S':          case 'S':
2006          show_malloc = 1;          show_malloc = 1;
# Line 1437  while (!done) Line 2013  while (!done)
2013          case '?':          case '?':
2014          options |= PCRE_NO_UTF8_CHECK;          options |= PCRE_NO_UTF8_CHECK;
2015          continue;          continue;
2016    
2017            case '<':
2018              {
2019              int x = check_newline(p, outfile);
2020              if (x == 0) goto NEXT_DATA;
2021              options |= x;
2022              while (*p++ != '>');
2023              }
2024            continue;
2025          }          }
2026        *q++ = c;        *q++ = c;
2027        }        }
# Line 1467  while (!done) Line 2052  while (!done)
2052    
2053        if (rc != 0)        if (rc != 0)
2054          {          {
2055          (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);          (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2056          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
2057          }          }
2058          else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
2059                  != 0)
2060            {
2061            fprintf(outfile, "Matched with REG_NOSUB\n");
2062            }
2063        else        else
2064          {          {
2065          size_t i;          size_t i;
# Line 1501  while (!done) Line 2091  while (!done)
2091    
2092      for (;; gmatched++)    /* Loop for /g or /G */      for (;; gmatched++)    /* Loop for /g or /G */
2093        {        {
2094        if (timeit)        if (timeitm > 0)
2095          {          {
2096          register int i;          register int i;
2097          clock_t time_taken;          clock_t time_taken;
2098          clock_t start_time = clock();          clock_t start_time = clock();
2099    
2100    #if !defined NODFA
2101          if (all_use_dfa || use_dfa)          if (all_use_dfa || use_dfa)
2102            {            {
2103            int workspace[1000];            int workspace[1000];
2104            for (i = 0; i < LOOPREPEAT; i++)            for (i = 0; i < timeitm; i++)
2105              count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,              count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2106                options | g_notempty, use_offsets, use_size_offsets, workspace,                options | g_notempty, use_offsets, use_size_offsets, workspace,
2107                sizeof(workspace)/sizeof(int));                sizeof(workspace)/sizeof(int));
2108            }            }
2109          else          else
2110    #endif
2111    
2112          for (i = 0; i < LOOPREPEAT; i++)          for (i = 0; i < timeitm; i++)
2113            count = pcre_exec(re, extra, (char *)bptr, len,            count = pcre_exec(re, extra, (char *)bptr, len,
2114              start_offset, options | g_notempty, use_offsets, use_size_offsets);              start_offset, options | g_notempty, use_offsets, use_size_offsets);
2115    
2116          time_taken = clock() - start_time;          time_taken = clock() - start_time;
2117          fprintf(outfile, "Execute time %.3f milliseconds\n",          fprintf(outfile, "Execute time %.4f milliseconds\n",
2118            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /            (((double)time_taken * 1000.0) / (double)timeitm) /
2119              (double)CLOCKS_PER_SEC);              (double)CLOCKS_PER_SEC);
2120          }          }
2121    
2122        /* If find_match_limit is set, we want to do repeated matches with        /* If find_match_limit is set, we want to do repeated matches with
2123        varying limits in order to find the minimum value. */        varying limits in order to find the minimum value for the match limit and
2124          for the recursion limit. */
2125    
2126        if (find_match_limit)        if (find_match_limit)
2127          {          {
         int min = 0;  
         int mid = 64;  
         int max = -1;  
   
2128          if (extra == NULL)          if (extra == NULL)
2129            {            {
2130            extra = (pcre_extra *)malloc(sizeof(pcre_extra));            extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2131            extra->flags = 0;            extra->flags = 0;
2132            }            }
         extra->flags |= PCRE_EXTRA_MATCH_LIMIT;  
2133    
2134          for (;;)          (void)check_match_limit(re, extra, bptr, len, start_offset,
2135            {            options|g_notempty, use_offsets, use_size_offsets,
2136            extra->match_limit = mid;            PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2137            count = pcre_exec(re, extra, (char *)bptr, len, start_offset,            PCRE_ERROR_MATCHLIMIT, "match()");
2138              options | g_notempty, use_offsets, use_size_offsets);  
2139            if (count == PCRE_ERROR_MATCHLIMIT)          count = check_match_limit(re, extra, bptr, len, start_offset,
2140              {            options|g_notempty, use_offsets, use_size_offsets,
2141              /* fprintf(outfile, "Testing match limit = %d\n", mid); */            PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2142              min = mid;            PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
             mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;  
             }  
           else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||  
                                  count == PCRE_ERROR_PARTIAL)  
             {  
             if (mid == min + 1)  
               {  
               fprintf(outfile, "Minimum match limit = %d\n", mid);  
               break;  
               }  
             /* fprintf(outfile, "Testing match limit = %d\n", mid); */  
             max = mid;  
             mid = (min + mid)/2;  
             }  
           else break;    /* Some other error */  
           }  
   
         extra->flags &= ~PCRE_EXTRA_MATCH_LIMIT;  
2143          }          }
2144    
2145        /* If callout_data is set, use the interface with additional data */        /* If callout_data is set, use the interface with additional data */
# Line 1591  while (!done) Line 2161  while (!done)
2161        /* The normal case is just to do the match once, with the default        /* The normal case is just to do the match once, with the default
2162        value of match_limit. */        value of match_limit. */
2163    
2164    #if !defined NODFA
2165        else if (all_use_dfa || use_dfa)        else if (all_use_dfa || use_dfa)
2166          {          {
2167          int workspace[1000];          int workspace[1000];
# Line 1603  while (!done) Line 2174  while (!done)
2174            count = use_size_offsets/2;            count = use_size_offsets/2;
2175            }            }
2176          }          }
2177    #endif
2178    
2179        else        else
2180          {          {
# Line 1619  while (!done) Line 2191  while (!done)
2191    
2192        if (count >= 0)        if (count >= 0)
2193          {          {
2194          int i;          int i, maxcount;
2195    
2196    #if !defined NODFA
2197            if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2198    #endif
2199              maxcount = use_size_offsets/3;
2200    
2201            /* This is a check against a lunatic return value. */
2202    
2203            if (count > maxcount)
2204              {
2205              fprintf(outfile,
2206                "** PCRE error: returned count %d is too big for offset size %d\n",
2207                count, use_size_offsets);
2208              count = use_size_offsets/3;
2209              if (do_g || do_G)
2210                {
2211                fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2212                do_g = do_G = FALSE;        /* Break g/G loop */
2213                }
2214              }
2215    
2216          for (i = 0; i < count * 2; i += 2)          for (i = 0; i < count * 2; i += 2)
2217            {            {
2218            if (use_offsets[i] < 0)            if (use_offsets[i] < 0)
# Line 1647  while (!done) Line 2240  while (!done)
2240            {            {
2241            if ((copystrings & (1 << i)) != 0)            if ((copystrings & (1 << i)) != 0)
2242              {              {
2243              char copybuffer[16];              char copybuffer[256];
2244              int rc = pcre_copy_substring((char *)bptr, use_offsets, count,              int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2245                i, copybuffer, sizeof(copybuffer));                i, copybuffer, sizeof(copybuffer));
2246              if (rc < 0)              if (rc < 0)
# Line 1657  while (!done) Line 2250  while (!done)
2250              }              }
2251            }            }
2252    
2253            for (copynamesptr = copynames;
2254                 *copynamesptr != 0;
2255                 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2256              {
2257              char copybuffer[256];
2258              int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2259                count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2260              if (rc < 0)
2261                fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2262              else
2263                fprintf(outfile, "  C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2264              }
2265    
2266          for (i = 0; i < 32; i++)          for (i = 0; i < 32; i++)
2267            {            {
2268            if ((getstrings & (1 << i)) != 0)            if ((getstrings & (1 << i)) != 0)
# Line 1669  while (!done) Line 2275  while (!done)
2275              else              else
2276                {                {
2277                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
               /* free((void *)substring); */  
2278                pcre_free_substring(substring);                pcre_free_substring(substring);
2279                }                }
2280              }              }
2281            }            }
2282    
2283            for (getnamesptr = getnames;
2284                 *getnamesptr != 0;
2285                 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2286              {
2287              const char *substring;
2288              int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2289                count, (char *)getnamesptr, &substring);
2290              if (rc < 0)
2291                fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2292              else
2293                {
2294                fprintf(outfile, "  G %s (%d) %s\n", substring, rc, getnamesptr);
2295                pcre_free_substring(substring);
2296                }
2297              }
2298    
2299          if (getlist)          if (getlist)
2300            {            {
2301            const char **stringlist;            const char **stringlist;
# Line 1699  while (!done) Line 2320  while (!done)
2320        else if (count == PCRE_ERROR_PARTIAL)        else if (count == PCRE_ERROR_PARTIAL)
2321          {          {
2322          fprintf(outfile, "Partial match");          fprintf(outfile, "Partial match");
2323    #if !defined NODFA
2324          if ((all_use_dfa || use_dfa) && use_size_offsets > 2)          if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
2325            fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],            fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
2326              bptr + use_offsets[0]);              bptr + use_offsets[0]);
2327    #endif
2328          fprintf(outfile, "\n");          fprintf(outfile, "\n");
2329          break;  /* Out of the /g loop */          break;  /* Out of the /g loop */
2330          }          }
2331    
2332        /* Failed to match. If this is a /g or /G loop and we previously set        /* Failed to match. If this is a /g or /G loop and we previously set
2333        g_notempty after a null match, this is not necessarily the end.        g_notempty after a null match, this is not necessarily the end. We want
2334        We want to advance the start offset, and continue. In the case of UTF-8        to advance the start offset, and continue. We won't be at the end of the
2335        matching, the advance must be one character, not one byte. Fudge the        string - that was checked before setting g_notempty.
2336        offset values to achieve this. We won't be at the end of the string -  
2337        that was checked before setting g_notempty. */        Complication arises in the case when the newline option is "any" or
2338          "anycrlf". If the previous match was at the end of a line terminated by
2339          CRLF, an advance of one character just passes the \r, whereas we should
2340          prefer the longer newline sequence, as does the code in pcre_exec().
2341          Fudge the offset value to achieve this.
2342    
2343          Otherwise, in the case of UTF-8 matching, the advance must be one
2344          character, not one byte. */
2345    
2346        else        else
2347          {          {
2348          if (g_notempty != 0)          if (g_notempty != 0)
2349            {            {
2350            int onechar = 1;            int onechar = 1;
2351              unsigned int obits = ((real_pcre *)re)->options;
2352            use_offsets[0] = start_offset;            use_offsets[0] = start_offset;
2353            if (use_utf8)            if ((obits & PCRE_NEWLINE_BITS) == 0)
2354                {
2355                int d;
2356                (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2357                obits = (d == '\r')? PCRE_NEWLINE_CR :
2358                        (d == '\n')? PCRE_NEWLINE_LF :
2359                        (d == ('\r'<<8 | '\n'))? PCRE_NEWLINE_CRLF :
2360                        (d == -2)? PCRE_NEWLINE_ANYCRLF :
2361                        (d == -1)? PCRE_NEWLINE_ANY : 0;
2362                }
2363              if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2364                   (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2365                  &&
2366                  start_offset < len - 1 &&
2367                  bptr[start_offset] == '\r' &&
2368                  bptr[start_offset+1] == '\n')
2369                onechar++;
2370              else if (use_utf8)
2371              {              {
2372              while (start_offset + onechar < len)              while (start_offset + onechar < len)
2373                {                {
# Line 1754  while (!done) Line 2402  while (!done)
2402        character. */        character. */
2403    
2404        g_notempty = 0;        g_notempty = 0;
2405    
2406        if (use_offsets[0] == use_offsets[1])        if (use_offsets[0] == use_offsets[1])
2407          {          {
2408          if (use_offsets[0] == len) break;          if (use_offsets[0] == len) break;
# Line 1772  while (!done) Line 2421  while (!done)
2421          len -= use_offsets[1];          len -= use_offsets[1];
2422          }          }
2423        }  /* End of loop for /g and /G */        }  /* End of loop for /g and /G */
2424    
2425        NEXT_DATA: continue;
2426      }    /* End of loop for data lines */      }    /* End of loop for data lines */
2427    
2428    CONTINUE:    CONTINUE:
# Line 1786  while (!done) Line 2437  while (!done)
2437      {      {
2438      new_free((void *)tables);      new_free((void *)tables);
2439      setlocale(LC_CTYPE, "C");      setlocale(LC_CTYPE, "C");
2440        locale_set = 0;
2441      }      }
2442    }    }
2443    

Legend:
Removed from v.77  
changed lines
  Added in v.355

  ViewVC Help
Powered by ViewVC 1.1.5