/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 85 by nigel, Sat Feb 24 21:41:13 2007 UTC revision 376 by ph10, Sun Mar 1 12:00:59 2009 UTC
# Line 36  POSSIBILITY OF SUCH DAMAGE. Line 36  POSSIBILITY OF SUCH DAMAGE.
36  */  */
37    
38    
39    #ifdef HAVE_CONFIG_H
40    #include "config.h"
41    #endif
42    
43  #include <ctype.h>  #include <ctype.h>
44  #include <stdio.h>  #include <stdio.h>
45  #include <string.h>  #include <string.h>
# Line 44  POSSIBILITY OF SUCH DAMAGE. Line 48  POSSIBILITY OF SUCH DAMAGE.
48  #include <locale.h>  #include <locale.h>
49  #include <errno.h>  #include <errno.h>
50    
51  #define PCRE_SPY        /* For Win32 build, import data, not export */  #ifdef SUPPORT_LIBREADLINE
52    #ifdef HAVE_UNISTD_H
53    #include <unistd.h>
54    #endif
55    #include <readline/readline.h>
56    #include <readline/history.h>
57    #endif
58    
59    
60    /* A number of things vary for Windows builds. Originally, pcretest opened its
61    input and output without "b"; then I was told that "b" was needed in some
62    environments, so it was added for release 5.0 to both the input and output. (It
63    makes no difference on Unix-like systems.) Later I was told that it is wrong
64    for the input on Windows. I've now abstracted the modes into two macros that
65    are set here, to make it easier to fiddle with them, and removed "b" from the
66    input mode under Windows. */
67    
68  /* We include pcre_internal.h because we need the internal info for displaying  #if defined(_WIN32) || defined(WIN32)
69  the results of pcre_study() and we also need to know about the internal  #include <io.h>                /* For _setmode() */
70  macros, structures, and other internal data values; pcretest has "inside  #include <fcntl.h>             /* For _O_BINARY */
71  information" compared to a program that strictly follows the PCRE API. */  #define INPUT_MODE   "r"
72    #define OUTPUT_MODE  "wb"
73    
74    #define isatty _isatty         /* This is what Windows calls them, I'm told */
75    #define fileno _fileno
76    
77    #else
78    #include <sys/time.h>          /* These two includes are needed */
79    #include <sys/resource.h>      /* for setrlimit(). */
80    #define INPUT_MODE   "rb"
81    #define OUTPUT_MODE  "wb"
82    #endif
83    
84    
85    /* We have to include pcre_internal.h because we need the internal info for
86    displaying the results of pcre_study() and we also need to know about the
87    internal macros, structures, and other internal data values; pcretest has
88    "inside information" compared to a program that strictly follows the PCRE API.
89    
90    Although pcre_internal.h does itself include pcre.h, we explicitly include it
91    here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
92    appropriately for an application, not for building PCRE. */
93    
94    #include "pcre.h"
95  #include "pcre_internal.h"  #include "pcre_internal.h"
96    
97  /* We need access to the data tables that PCRE uses. So as not to have to keep  /* We need access to some of the data tables that PCRE uses. So as not to have
98  two copies, we include the source file here, changing the names of the external  to keep two copies, we include the source file here, changing the names of the
99  symbols to prevent clashes. */  external symbols to prevent clashes. */
100    
101    #define _pcre_ucp_gentype      ucp_gentype
102  #define _pcre_utf8_table1      utf8_table1  #define _pcre_utf8_table1      utf8_table1
103  #define _pcre_utf8_table1_size utf8_table1_size  #define _pcre_utf8_table1_size utf8_table1_size
104  #define _pcre_utf8_table2      utf8_table2  #define _pcre_utf8_table2      utf8_table2
# Line 64  symbols to prevent clashes. */ Line 106  symbols to prevent clashes. */
106  #define _pcre_utf8_table4      utf8_table4  #define _pcre_utf8_table4      utf8_table4
107  #define _pcre_utt              utt  #define _pcre_utt              utt
108  #define _pcre_utt_size         utt_size  #define _pcre_utt_size         utt_size
109    #define _pcre_utt_names        utt_names
110  #define _pcre_OP_lengths       OP_lengths  #define _pcre_OP_lengths       OP_lengths
111    
112  #include "pcre_tables.c"  #include "pcre_tables.c"
113    
114  /* We also need the pcre_printint() function for printing out compiled  /* We also need the pcre_printint() function for printing out compiled
115  patterns. This function is in a separate file so that it can be included in  patterns. This function is in a separate file so that it can be included in
116  pcre_compile.c when that module is compiled with debugging enabled. */  pcre_compile.c when that module is compiled with debugging enabled.
117    
118    The definition of the macro PRINTABLE, which determines whether to print an
119    output character as-is or as a hex value when showing compiled patterns, is
120    contained in this file. We uses it here also, in cases when the locale has not
121    been explicitly changed, so as to get consistent output from systems that
122    differ in their output from isprint() even in the "C" locale. */
123    
124  #include "pcre_printint.src"  #include "pcre_printint.src"
125    
126    #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
127    
128    
129  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
130  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 83  Makefile. */ Line 134  Makefile. */
134  #include "pcreposix.h"  #include "pcreposix.h"
135  #endif  #endif
136    
137  /* It is also possible, for the benefit of the version imported into Exim, to  /* It is also possible, for the benefit of the version currently imported into
138  build pcretest without support for UTF8 (define NOUTF8), without the interface  Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
139  to the DFA matcher (NODFA), and without the doublecheck of the old "info"  interface to the DFA matcher (NODFA), and without the doublecheck of the old
140  function (define NOINFOCHECK). */  "info" function (define NOINFOCHECK). In fact, we automatically cut out the
141    UTF8 support if PCRE is built without it. */
142    
143    #ifndef SUPPORT_UTF8
144    #ifndef NOUTF8
145    #define NOUTF8
146    #endif
147    #endif
148    
149    
150  /* Other parameters */  /* Other parameters */
# Line 99  function (define NOINFOCHECK). */ Line 157  function (define NOINFOCHECK). */
157  #endif  #endif
158  #endif  #endif
159    
160  #define LOOPREPEAT 500000  /* This is the default loop count for timing. */
   
 #define BUFFER_SIZE 30000  
 #define PBUFFER_SIZE BUFFER_SIZE  
 #define DBUFFER_SIZE BUFFER_SIZE  
161    
162    #define LOOPREPEAT 500000
163    
164  /* Static variables */  /* Static variables */
165    
# Line 114  static int callout_count; Line 169  static int callout_count;
169  static int callout_extra;  static int callout_extra;
170  static int callout_fail_count;  static int callout_fail_count;
171  static int callout_fail_id;  static int callout_fail_id;
172    static int debug_lengths;
173  static int first_callout;  static int first_callout;
174    static int locale_set = 0;
175  static int show_malloc;  static int show_malloc;
176  static int use_utf8;  static int use_utf8;
177  static size_t gotten_store;  static size_t gotten_store;
178    
179    /* The buffers grow automatically if very long input lines are encountered. */
180    
181    static int buffer_size = 50000;
182    static uschar *buffer = NULL;
183    static uschar *dbuffer = NULL;
184  static uschar *pbuffer = NULL;  static uschar *pbuffer = NULL;
185    
186    
187    
188  /*************************************************  /*************************************************
189    *        Read or extend an input line            *
190    *************************************************/
191    
192    /* Input lines are read into buffer, but both patterns and data lines can be
193    continued over multiple input lines. In addition, if the buffer fills up, we
194    want to automatically expand it so as to be able to handle extremely large
195    lines that are needed for certain stress tests. When the input buffer is
196    expanded, the other two buffers must also be expanded likewise, and the
197    contents of pbuffer, which are a copy of the input for callouts, must be
198    preserved (for when expansion happens for a data line). This is not the most
199    optimal way of handling this, but hey, this is just a test program!
200    
201    Arguments:
202      f            the file to read
203      start        where in buffer to start (this *must* be within buffer)
204      prompt       for stdin or readline()
205    
206    Returns:       pointer to the start of new data
207                   could be a copy of start, or could be moved
208                   NULL if no data read and EOF reached
209    */
210    
211    static uschar *
212    extend_inputline(FILE *f, uschar *start, const char *prompt)
213    {
214    uschar *here = start;
215    
216    for (;;)
217      {
218      int rlen = buffer_size - (here - buffer);
219    
220      if (rlen > 1000)
221        {
222        int dlen;
223    
224        /* If libreadline support is required, use readline() to read a line if the
225        input is a terminal. Note that readline() removes the trailing newline, so
226        we must put it back again, to be compatible with fgets(). */
227    
228    #ifdef SUPPORT_LIBREADLINE
229        if (isatty(fileno(f)))
230          {
231          size_t len;
232          char *s = readline(prompt);
233          if (s == NULL) return (here == start)? NULL : start;
234          len = strlen(s);
235          if (len > 0) add_history(s);
236          if (len > rlen - 1) len = rlen - 1;
237          memcpy(here, s, len);
238          here[len] = '\n';
239          here[len+1] = 0;
240          free(s);
241          }
242        else
243    #endif
244    
245        /* Read the next line by normal means, prompting if the file is stdin. */
246    
247          {
248          if (f == stdin) printf(prompt);
249          if (fgets((char *)here, rlen,  f) == NULL)
250            return (here == start)? NULL : start;
251          }
252    
253        dlen = (int)strlen((char *)here);
254        if (dlen > 0 && here[dlen - 1] == '\n') return start;
255        here += dlen;
256        }
257    
258      else
259        {
260        int new_buffer_size = 2*buffer_size;
261        uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
262        uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
263        uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
264    
265        if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
266          {
267          fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
268          exit(1);
269          }
270    
271        memcpy(new_buffer, buffer, buffer_size);
272        memcpy(new_pbuffer, pbuffer, buffer_size);
273    
274        buffer_size = new_buffer_size;
275    
276        start = new_buffer + (start - buffer);
277        here = new_buffer + (here - buffer);
278    
279        free(buffer);
280        free(dbuffer);
281        free(pbuffer);
282    
283        buffer = new_buffer;
284        dbuffer = new_dbuffer;
285        pbuffer = new_pbuffer;
286        }
287      }
288    
289    return NULL;  /* Control never gets here */
290    }
291    
292    
293    
294    
295    
296    
297    
298    /*************************************************
299  *          Read number from string               *  *          Read number from string               *
300  *************************************************/  *************************************************/
301    
302  /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess  /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
303  around with conditional compilation, just do the job by hand. It is only used  around with conditional compilation, just do the job by hand. It is only used
304  for unpicking the -o argument, so just keep it simple.  for unpicking arguments, so just keep it simple.
305    
306  Arguments:  Arguments:
307    str           string to be converted    str           string to be converted
# Line 159  return(result); Line 331  return(result);
331  and returns the value of the character.  and returns the value of the character.
332    
333  Argument:  Argument:
334    buffer   a pointer to the byte vector    utf8bytes   a pointer to the byte vector
335    vptr     a pointer to an int to receive the value    vptr        a pointer to an int to receive the value
336    
337  Returns:   >  0 => the number of bytes consumed  Returns:      >  0 => the number of bytes consumed
338             -6 to 0 => malformed UTF-8 character at offset = (-return)                -6 to 0 => malformed UTF-8 character at offset = (-return)
339  */  */
340    
341  #if !defined NOUTF8  #if !defined NOUTF8
342    
343  static int  static int
344  utf82ord(unsigned char *buffer, int *vptr)  utf82ord(unsigned char *utf8bytes, int *vptr)
345  {  {
346  int c = *buffer++;  int c = *utf8bytes++;
347  int d = c;  int d = c;
348  int i, j, s;  int i, j, s;
349    
# Line 191  d = (c & utf8_table3[i]) << s; Line 363  d = (c & utf8_table3[i]) << s;
363    
364  for (j = 0; j < i; j++)  for (j = 0; j < i; j++)
365    {    {
366    c = *buffer++;    c = *utf8bytes++;
367    if ((c & 0xc0) != 0x80) return -(j+1);    if ((c & 0xc0) != 0x80) return -(j+1);
368    s -= 6;    s -= 6;
369    d |= (c & 0x3f) << s;    d |= (c & 0x3f) << s;
# Line 222  and encodes it as a UTF-8 character in 0 Line 394  and encodes it as a UTF-8 character in 0
394    
395  Arguments:  Arguments:
396    cvalue     the character value    cvalue     the character value
397    buffer     pointer to buffer for result - at least 6 bytes long    utf8bytes  pointer to buffer for result - at least 6 bytes long
398    
399  Returns:     number of characters placed in the buffer  Returns:     number of characters placed in the buffer
400  */  */
401    
402    #if !defined NOUTF8
403    
404  static int  static int
405  ord2utf8(int cvalue, uschar *buffer)  ord2utf8(int cvalue, uschar *utf8bytes)
406  {  {
407  register int i, j;  register int i, j;
408  for (i = 0; i < utf8_table1_size; i++)  for (i = 0; i < utf8_table1_size; i++)
409    if (cvalue <= utf8_table1[i]) break;    if (cvalue <= utf8_table1[i]) break;
410  buffer += i;  utf8bytes += i;
411  for (j = i; j > 0; j--)  for (j = i; j > 0; j--)
412   {   {
413   *buffer-- = 0x80 | (cvalue & 0x3f);   *utf8bytes-- = 0x80 | (cvalue & 0x3f);
414   cvalue >>= 6;   cvalue >>= 6;
415   }   }
416  *buffer = utf8_table2[i] | cvalue;  *utf8bytes = utf8_table2[i] | cvalue;
417  return i + 1;  return i + 1;
418  }  }
419    
420    #endif
421    
422    
423    
424  /*************************************************  /*************************************************
# Line 269  while (length-- > 0) Line 445  while (length-- > 0)
445        {        {
446        length -= rc - 1;        length -= rc - 1;
447        p += rc;        p += rc;
448        if (c < 256 && isprint(c))        if (PRINTHEX(c))
449          {          {
450          if (f != NULL) fprintf(f, "%c", c);          if (f != NULL) fprintf(f, "%c", c);
451          yield++;          yield++;
452          }          }
453        else        else
454          {          {
455          int n;          int n = 4;
456          if (f != NULL) fprintf(f, "\\x{%02x}%n", c, &n);          if (f != NULL) fprintf(f, "\\x{%02x}", c);
457          yield += n;          yield += (n <= 0x000000ff)? 2 :
458                     (n <= 0x00000fff)? 3 :
459                     (n <= 0x0000ffff)? 4 :
460                     (n <= 0x000fffff)? 5 : 6;
461          }          }
462        continue;        continue;
463        }        }
# Line 287  while (length-- > 0) Line 466  while (length-- > 0)
466    
467     /* Not UTF-8, or malformed UTF-8  */     /* Not UTF-8, or malformed UTF-8  */
468    
469    if (isprint(c = *(p++)))    c = *p++;
470      if (PRINTHEX(c))
471      {      {
472      if (f != NULL) fprintf(f, "%c", c);      if (f != NULL) fprintf(f, "%c", c);
473      yield++;      yield++;
# Line 461  if ((rc = pcre_fullinfo(re, study, optio Line 641  if ((rc = pcre_fullinfo(re, study, optio
641  *         Byte flipping function                 *  *         Byte flipping function                 *
642  *************************************************/  *************************************************/
643    
644  static long int  static unsigned long int
645  byteflip(long int value, int n)  byteflip(unsigned long int value, int n)
646  {  {
647  if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);  if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
648  return ((value & 0x000000ff) << 24) |  return ((value & 0x000000ff) << 24) |
# Line 475  return ((value & 0x000000ff) << 24) | Line 655  return ((value & 0x000000ff) << 24) |
655    
656    
657  /*************************************************  /*************************************************
658    *        Check match or recursion limit          *
659    *************************************************/
660    
661    static int
662    check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
663      int start_offset, int options, int *use_offsets, int use_size_offsets,
664      int flag, unsigned long int *limit, int errnumber, const char *msg)
665    {
666    int count;
667    int min = 0;
668    int mid = 64;
669    int max = -1;
670    
671    extra->flags |= flag;
672    
673    for (;;)
674      {
675      *limit = mid;
676    
677      count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
678        use_offsets, use_size_offsets);
679    
680      if (count == errnumber)
681        {
682        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
683        min = mid;
684        mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
685        }
686    
687      else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
688                             count == PCRE_ERROR_PARTIAL)
689        {
690        if (mid == min + 1)
691          {
692          fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
693          break;
694          }
695        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
696        max = mid;
697        mid = (min + mid)/2;
698        }
699      else break;    /* Some other error */
700      }
701    
702    extra->flags &= ~flag;
703    return count;
704    }
705    
706    
707    
708    /*************************************************
709    *         Case-independent strncmp() function    *
710    *************************************************/
711    
712    /*
713    Arguments:
714      s         first string
715      t         second string
716      n         number of characters to compare
717    
718    Returns:    < 0, = 0, or > 0, according to the comparison
719    */
720    
721    static int
722    strncmpic(uschar *s, uschar *t, int n)
723    {
724    while (n--)
725      {
726      int c = tolower(*s++) - tolower(*t++);
727      if (c) return c;
728      }
729    return 0;
730    }
731    
732    
733    
734    /*************************************************
735    *         Check newline indicator                *
736    *************************************************/
737    
738    /* This is used both at compile and run-time to check for <xxx> escapes, where
739    xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is
740    no match.
741    
742    Arguments:
743      p           points after the leading '<'
744      f           file for error message
745    
746    Returns:      appropriate PCRE_NEWLINE_xxx flags, or 0
747    */
748    
749    static int
750    check_newline(uschar *p, FILE *f)
751    {
752    if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
753    if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
754    if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
755    if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
756    if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
757    if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
758    if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
759    fprintf(f, "Unknown newline type at: <%s\n", p);
760    return 0;
761    }
762    
763    
764    
765    /*************************************************
766    *             Usage function                     *
767    *************************************************/
768    
769    static void
770    usage(void)
771    {
772    printf("Usage:     pcretest [options] [<input file> [<output file>]]\n\n");
773    printf("Input and output default to stdin and stdout.\n");
774    #ifdef SUPPORT_LIBREADLINE
775    printf("If input is a terminal, readline() is used to read from it.\n");
776    #else
777    printf("This version of pcretest is not linked with readline().\n");
778    #endif
779    printf("\nOptions:\n");
780    printf("  -b       show compiled code (bytecode)\n");
781    printf("  -C       show PCRE compile-time options and exit\n");
782    printf("  -d       debug: show compiled code and information (-b and -i)\n");
783    #if !defined NODFA
784    printf("  -dfa     force DFA matching for all subjects\n");
785    #endif
786    printf("  -help    show usage information\n");
787    printf("  -i       show information about compiled patterns\n"
788           "  -m       output memory used information\n"
789           "  -o <n>   set size of offsets vector to <n>\n");
790    #if !defined NOPOSIX
791    printf("  -p       use POSIX interface\n");
792    #endif
793    printf("  -q       quiet: do not output PCRE version number at start\n");
794    printf("  -S <n>   set stack size to <n> megabytes\n");
795    printf("  -s       output store (memory) used information\n"
796           "  -t       time compilation and execution\n");
797    printf("  -t <n>   time compilation and execution, repeating <n> times\n");
798    printf("  -tm      time execution (matching) only\n");
799    printf("  -tm <n>  time execution (matching) only, repeating <n> times\n");
800    }
801    
802    
803    
804    /*************************************************
805  *                Main Program                    *  *                Main Program                    *
806  *************************************************/  *************************************************/
807    
# Line 489  int options = 0; Line 816  int options = 0;
816  int study_options = 0;  int study_options = 0;
817  int op = 1;  int op = 1;
818  int timeit = 0;  int timeit = 0;
819    int timeitm = 0;
820  int showinfo = 0;  int showinfo = 0;
821  int showstore = 0;  int showstore = 0;
822    int quiet = 0;
823  int size_offsets = 45;  int size_offsets = 45;
824  int size_offsets_max;  int size_offsets_max;
825  int *offsets = NULL;  int *offsets = NULL;
# Line 501  int debug = 0; Line 830  int debug = 0;
830  int done = 0;  int done = 0;
831  int all_use_dfa = 0;  int all_use_dfa = 0;
832  int yield = 0;  int yield = 0;
833    int stack_size;
834    
835    /* These vectors store, end-to-end, a list of captured substring names. Assume
836    that 1024 is plenty long enough for the few names we'll be testing. */
837    
838    uschar copynames[1024];
839    uschar getnames[1024];
840    
841  unsigned char *buffer;  uschar *copynamesptr;
842  unsigned char *dbuffer;  uschar *getnamesptr;
843    
844  /* Get buffers from malloc() so that Electric Fence will check their misuse  /* Get buffers from malloc() so that Electric Fence will check their misuse
845  when I am debugging. */  when I am debugging. They grow automatically when very long lines are read. */
846    
847  buffer = (unsigned char *)malloc(BUFFER_SIZE);  buffer = (unsigned char *)malloc(buffer_size);
848  dbuffer = (unsigned char *)malloc(DBUFFER_SIZE);  dbuffer = (unsigned char *)malloc(buffer_size);
849  pbuffer = (unsigned char *)malloc(PBUFFER_SIZE);  pbuffer = (unsigned char *)malloc(buffer_size);
   
 /* The outfile variable is static so that new_malloc can use it. The _setmode()  
 stuff is some magic that I don't understand, but which apparently does good  
 things in Windows. It's related to line terminations.  */  
850    
851  #if defined(_WIN32) || defined(WIN32)  /* The outfile variable is static so that new_malloc can use it. */
 _setmode( _fileno( stdout ), 0x8000 );  
 #endif  /* defined(_WIN32) || defined(WIN32) */  
852    
853  outfile = stdout;  outfile = stdout;
854    
855    /* The following  _setmode() stuff is some Windows magic that tells its runtime
856    library to translate CRLF into a single LF character. At least, that's what
857    I've been told: never having used Windows I take this all on trust. Originally
858    it set 0x8000, but then I was advised that _O_BINARY was better. */
859    
860    #if defined(_WIN32) || defined(WIN32)
861    _setmode( _fileno( stdout ), _O_BINARY );
862    #endif
863    
864  /* Scan options */  /* Scan options */
865    
866  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
# Line 530  while (argc > 1 && argv[op][0] == '-') Line 869  while (argc > 1 && argv[op][0] == '-')
869    
870    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
871      showstore = 1;      showstore = 1;
872    else if (strcmp(argv[op], "-t") == 0) timeit = 1;    else if (strcmp(argv[op], "-q") == 0) quiet = 1;
873      else if (strcmp(argv[op], "-b") == 0) debug = 1;
874    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
875    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
876  #if !defined NODFA  #if !defined NODFA
# Line 543  while (argc > 1 && argv[op][0] == '-') Line 883  while (argc > 1 && argv[op][0] == '-')
883      op++;      op++;
884      argc--;      argc--;
885      }      }
886      else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
887        {
888        int both = argv[op][2] == 0;
889        int temp;
890        if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
891                         *endptr == 0))
892          {
893          timeitm = temp;
894          op++;
895          argc--;
896          }
897        else timeitm = LOOPREPEAT;
898        if (both) timeit = timeitm;
899        }
900      else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
901          ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
902            *endptr == 0))
903        {
904    #if defined(_WIN32) || defined(WIN32)
905        printf("PCRE: -S not supported on this OS\n");
906        exit(1);
907    #else
908        int rc;
909        struct rlimit rlim;
910        getrlimit(RLIMIT_STACK, &rlim);
911        rlim.rlim_cur = stack_size * 1024 * 1024;
912        rc = setrlimit(RLIMIT_STACK, &rlim);
913        if (rc != 0)
914          {
915        printf("PCRE: setrlimit() failed with error %d\n", rc);
916        exit(1);
917          }
918        op++;
919        argc--;
920    #endif
921        }
922  #if !defined NOPOSIX  #if !defined NOPOSIX
923    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(argv[op], "-p") == 0) posix = 1;
924  #endif  #endif
925    else if (strcmp(argv[op], "-C") == 0)    else if (strcmp(argv[op], "-C") == 0)
926      {      {
927      int rc;      int rc;
928        unsigned long int lrc;
929      printf("PCRE version %s\n", pcre_version());      printf("PCRE version %s\n", pcre_version());
930      printf("Compiled with\n");      printf("Compiled with\n");
931      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
# Line 556  while (argc > 1 && argv[op][0] == '-') Line 933  while (argc > 1 && argv[op][0] == '-')
933      (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);      (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
934      printf("  %sUnicode properties support\n", rc? "" : "No ");      printf("  %sUnicode properties support\n", rc? "" : "No ");
935      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
936      printf("  Newline character is %s\n", (rc == '\r')? "CR" : "LF");      printf("  Newline sequence is %s\n", (rc == '\r')? "CR" :
937          (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :
938          (rc == -2)? "ANYCRLF" :
939          (rc == -1)? "ANY" : "???");
940        (void)pcre_config(PCRE_CONFIG_BSR, &rc);
941        printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :
942                                         "all Unicode newlines");
943      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
944      printf("  Internal link size = %d\n", rc);      printf("  Internal link size = %d\n", rc);
945      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
946      printf("  POSIX malloc threshold = %d\n", rc);      printf("  POSIX malloc threshold = %d\n", rc);
947      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
948      printf("  Default match limit = %d\n", rc);      printf("  Default match limit = %ld\n", lrc);
949        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
950        printf("  Default recursion depth limit = %ld\n", lrc);
951      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
952      printf("  Match recursion uses %s\n", rc? "stack" : "heap");      printf("  Match recursion uses %s\n", rc? "stack" : "heap");
953      exit(0);      goto EXIT;
954        }
955      else if (strcmp(argv[op], "-help") == 0 ||
956               strcmp(argv[op], "--help") == 0)
957        {
958        usage();
959        goto EXIT;
960      }      }
961    else    else
962      {      {
963      printf("** Unknown or malformed option %s\n", argv[op]);      printf("** Unknown or malformed option %s\n", argv[op]);
964      printf("Usage:   pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");      usage();
     printf("  -C     show PCRE compile-time options and exit\n");  
     printf("  -d     debug: show compiled code; implies -i\n");  
 #if !defined NODFA  
     printf("  -dfa   force DFA matching for all subjects\n");  
 #endif  
     printf("  -i     show information about compiled pattern\n"  
            "  -m     output memory used information\n"  
            "  -o <n> set size of offsets vector to <n>\n");  
 #if !defined NOPOSIX  
     printf("  -p     use POSIX interface\n");  
 #endif  
     printf("  -s     output store (memory) used information\n"  
            "  -t     time compilation and execution\n");  
965      yield = 1;      yield = 1;
966      goto EXIT;      goto EXIT;
967      }      }
# Line 598  offsets = (int *)malloc(size_offsets_max Line 976  offsets = (int *)malloc(size_offsets_max
976  if (offsets == NULL)  if (offsets == NULL)
977    {    {
978    printf("** Failed to get %d bytes of memory for offsets vector\n",    printf("** Failed to get %d bytes of memory for offsets vector\n",
979      size_offsets_max * sizeof(int));      (int)(size_offsets_max * sizeof(int)));
980    yield = 1;    yield = 1;
981    goto EXIT;    goto EXIT;
982    }    }
# Line 607  if (offsets == NULL) Line 985  if (offsets == NULL)
985    
986  if (argc > 1)  if (argc > 1)
987    {    {
988    infile = fopen(argv[op], "rb");    infile = fopen(argv[op], INPUT_MODE);
989    if (infile == NULL)    if (infile == NULL)
990      {      {
991      printf("** Failed to open %s\n", argv[op]);      printf("** Failed to open %s\n", argv[op]);
# Line 618  if (argc > 1) Line 996  if (argc > 1)
996    
997  if (argc > 2)  if (argc > 2)
998    {    {
999    outfile = fopen(argv[op+1], "wb");    outfile = fopen(argv[op+1], OUTPUT_MODE);
1000    if (outfile == NULL)    if (outfile == NULL)
1001      {      {
1002      printf("** Failed to open %s\n", argv[op+1]);      printf("** Failed to open %s\n", argv[op+1]);
# Line 634  pcre_free = new_free; Line 1012  pcre_free = new_free;
1012  pcre_stack_malloc = stack_malloc;  pcre_stack_malloc = stack_malloc;
1013  pcre_stack_free = stack_free;  pcre_stack_free = stack_free;
1014    
1015  /* Heading line, then prompt for first regex if stdin */  /* Heading line unless quiet, then prompt for first regex if stdin */
1016    
1017  fprintf(outfile, "PCRE version %s\n\n", pcre_version());  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
1018    
1019  /* Main loop */  /* Main loop */
1020    
# Line 663  while (!done) Line 1041  while (!done)
1041    int do_showinfo = showinfo;    int do_showinfo = showinfo;
1042    int do_showrest = 0;    int do_showrest = 0;
1043    int do_flip = 0;    int do_flip = 0;
1044    int erroroffset, len, delimiter;    int erroroffset, len, delimiter, poffset;
1045    
1046    use_utf8 = 0;    use_utf8 = 0;
1047      debug_lengths = 1;
1048    
1049    if (infile == stdin) printf("  re> ");    if (extend_inputline(infile, buffer, "  re> ") == NULL) break;
   if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL) break;  
1050    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1051    fflush(outfile);    fflush(outfile);
1052    
# Line 680  while (!done) Line 1058  while (!done)
1058    
1059    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1060      {      {
1061      unsigned long int magic;      unsigned long int magic, get_options;
1062      uschar sbuf[8];      uschar sbuf[8];
1063      FILE *f;      FILE *f;
1064    
# Line 728  while (!done) Line 1106  while (!done)
1106    
1107      /* Need to know if UTF-8 for printing data strings */      /* Need to know if UTF-8 for printing data strings */
1108    
1109      new_info(re, NULL, PCRE_INFO_OPTIONS, &options);      new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1110      use_utf8 = (options & PCRE_UTF8) != 0;      use_utf8 = (get_options & PCRE_UTF8) != 0;
1111    
1112      /* Now see if there is any following study data */      /* Now see if there is any following study data */
1113    
# Line 768  while (!done) Line 1146  while (!done)
1146    
1147    if (isalnum(delimiter) || delimiter == '\\')    if (isalnum(delimiter) || delimiter == '\\')
1148      {      {
1149      fprintf(outfile, "** Delimiter must not be alphameric or \\\n");      fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1150      goto SKIP_DATA;      goto SKIP_DATA;
1151      }      }
1152    
1153    pp = p;    pp = p;
1154      poffset = p - buffer;
1155    
1156    for(;;)    for(;;)
1157      {      {
# Line 783  while (!done) Line 1162  while (!done)
1162        pp++;        pp++;
1163        }        }
1164      if (*pp != 0) break;      if (*pp != 0) break;
1165        if ((pp = extend_inputline(infile, pp, "    > ")) == NULL)
     len = BUFFER_SIZE - (pp - buffer);  
     if (len < 256)  
       {  
       fprintf(outfile, "** Expression too long - missing delimiter?\n");  
       goto SKIP_DATA;  
       }  
   
     if (infile == stdin) printf("    > ");  
     if (fgets((char *)pp, len, infile) == NULL)  
1166        {        {
1167        fprintf(outfile, "** Unexpected EOF\n");        fprintf(outfile, "** Unexpected EOF\n");
1168        done = 1;        done = 1;
# Line 801  while (!done) Line 1171  while (!done)
1171      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1172      }      }
1173    
1174      /* The buffer may have moved while being extended; reset the start of data
1175      pointer to the correct relative point in the buffer. */
1176    
1177      p = buffer + poffset;
1178    
1179    /* If the first character after the delimiter is backslash, make    /* If the first character after the delimiter is backslash, make
1180    the pattern end with backslash. This is purely to provide a way    the pattern end with backslash. This is purely to provide a way
1181    of testing for the error message when a pattern ends with backslash. */    of testing for the error message when a pattern ends with backslash. */
# Line 832  while (!done) Line 1207  while (!done)
1207    
1208        case '+': do_showrest = 1; break;        case '+': do_showrest = 1; break;
1209        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
1210          case 'B': do_debug = 1; break;
1211        case 'C': options |= PCRE_AUTO_CALLOUT; break;        case 'C': options |= PCRE_AUTO_CALLOUT; break;
1212        case 'D': do_debug = do_showinfo = 1; break;        case 'D': do_debug = do_showinfo = 1; break;
1213        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1214        case 'F': do_flip = 1; break;        case 'F': do_flip = 1; break;
1215        case 'G': do_G = 1; break;        case 'G': do_G = 1; break;
1216        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
1217          case 'J': options |= PCRE_DUPNAMES; break;
1218        case 'M': log_store = 1; break;        case 'M': log_store = 1; break;
1219        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1220    
# Line 848  while (!done) Line 1225  while (!done)
1225        case 'S': do_study = 1; break;        case 'S': do_study = 1; break;
1226        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
1227        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
1228          case 'Z': debug_lengths = 0; break;
1229        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1230        case '?': options |= PCRE_NO_UTF8_CHECK; break;        case '?': options |= PCRE_NO_UTF8_CHECK; break;
1231    
1232        case 'L':        case 'L':
1233        ppp = pp;        ppp = pp;
1234        /* The '\r' test here is so that it works on Windows */        /* The '\r' test here is so that it works on Windows. */
1235        while (*ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;        /* The '0' test is just in case this is an unterminated line. */
1236          while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1237        *ppp = 0;        *ppp = 0;
1238        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1239          {          {
1240          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1241          goto SKIP_DATA;          goto SKIP_DATA;
1242          }          }
1243          locale_set = 1;
1244        tables = pcre_maketables();        tables = pcre_maketables();
1245        pp = ppp;        pp = ppp;
1246        break;        break;
# Line 872  while (!done) Line 1252  while (!done)
1252        *pp = 0;        *pp = 0;
1253        break;        break;
1254    
1255          case '<':
1256            {
1257            if (strncmp((char *)pp, "JS>", 3) == 0)
1258              {
1259              options |= PCRE_JAVASCRIPT_COMPAT;
1260              pp += 3;
1261              }
1262            else
1263              {
1264              int x = check_newline(pp, outfile);
1265              if (x == 0) goto SKIP_DATA;
1266              options |= x;
1267              while (*pp++ != '>');
1268              }
1269            }
1270          break;
1271    
1272        case '\r':                      /* So that it works in Windows */        case '\r':                      /* So that it works in Windows */
1273        case '\n':        case '\n':
1274        case ' ':        case ' ':
# Line 896  while (!done) Line 1293  while (!done)
1293      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1294      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1295      if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;      if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1296        if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1297        if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1298    
1299      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
1300    
1301      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 903  while (!done) Line 1303  while (!done)
1303    
1304      if (rc != 0)      if (rc != 0)
1305        {        {
1306        (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);        (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1307        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1308        goto SKIP_DATA;        goto SKIP_DATA;
1309        }        }
# Line 915  while (!done) Line 1315  while (!done)
1315  #endif  /* !defined NOPOSIX */  #endif  /* !defined NOPOSIX */
1316    
1317      {      {
1318      if (timeit)      if (timeit > 0)
1319        {        {
1320        register int i;        register int i;
1321        clock_t time_taken;        clock_t time_taken;
1322        clock_t start_time = clock();        clock_t start_time = clock();
1323        for (i = 0; i < LOOPREPEAT; i++)        for (i = 0; i < timeit; i++)
1324          {          {
1325          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1326          if (re != NULL) free(re);          if (re != NULL) free(re);
1327          }          }
1328        time_taken = clock() - start_time;        time_taken = clock() - start_time;
1329        fprintf(outfile, "Compile time %.3f milliseconds\n",        fprintf(outfile, "Compile time %.4f milliseconds\n",
1330          (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /          (((double)time_taken * 1000.0) / (double)timeit) /
1331            (double)CLOCKS_PER_SEC);            (double)CLOCKS_PER_SEC);
1332        }        }
1333    
# Line 944  while (!done) Line 1344  while (!done)
1344          {          {
1345          for (;;)          for (;;)
1346            {            {
1347            if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)            if (extend_inputline(infile, buffer, NULL) == NULL)
1348              {              {
1349              done = 1;              done = 1;
1350              goto CONTINUE;              goto CONTINUE;
# Line 979  while (!done) Line 1379  while (!done)
1379    
1380      if (do_study)      if (do_study)
1381        {        {
1382        if (timeit)        if (timeit > 0)
1383          {          {
1384          register int i;          register int i;
1385          clock_t time_taken;          clock_t time_taken;
1386          clock_t start_time = clock();          clock_t start_time = clock();
1387          for (i = 0; i < LOOPREPEAT; i++)          for (i = 0; i < timeit; i++)
1388            extra = pcre_study(re, study_options, &error);            extra = pcre_study(re, study_options, &error);
1389          time_taken = clock() - start_time;          time_taken = clock() - start_time;
1390          if (extra != NULL) free(extra);          if (extra != NULL) free(extra);
1391          fprintf(outfile, "  Study time %.3f milliseconds\n",          fprintf(outfile, "  Study time %.4f milliseconds\n",
1392            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /            (((double)time_taken * 1000.0) / (double)timeit) /
1393              (double)CLOCKS_PER_SEC);              (double)CLOCKS_PER_SEC);
1394          }          }
1395        extra = pcre_study(re, study_options, &error);        extra = pcre_study(re, study_options, &error);
# Line 1007  while (!done) Line 1407  while (!done)
1407      if (do_flip)      if (do_flip)
1408        {        {
1409        real_pcre *rre = (real_pcre *)re;        real_pcre *rre = (real_pcre *)re;
1410        rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));        rre->magic_number =
1411            byteflip(rre->magic_number, sizeof(rre->magic_number));
1412        rre->size = byteflip(rre->size, sizeof(rre->size));        rre->size = byteflip(rre->size, sizeof(rre->size));
1413        rre->options = byteflip(rre->options, sizeof(rre->options));        rre->options = byteflip(rre->options, sizeof(rre->options));
1414        rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));        rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
1415        rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));        rre->top_bracket =
1416        rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));          (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1417        rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));        rre->top_backref =
1418        rre->name_table_offset = byteflip(rre->name_table_offset,          (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1419          rre->first_byte =
1420            (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
1421          rre->req_byte =
1422            (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
1423          rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1424          sizeof(rre->name_table_offset));          sizeof(rre->name_table_offset));
1425        rre->name_entry_size = byteflip(rre->name_entry_size,        rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
1426          sizeof(rre->name_entry_size));          sizeof(rre->name_entry_size));
1427        rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));        rre->name_count = (pcre_uint16)byteflip(rre->name_count,
1428            sizeof(rre->name_count));
1429    
1430        if (extra != NULL)        if (extra != NULL)
1431          {          {
# Line 1032  while (!done) Line 1439  while (!done)
1439    
1440      SHOW_INFO:      SHOW_INFO:
1441    
1442        if (do_debug)
1443          {
1444          fprintf(outfile, "------------------------------------------------------------------\n");
1445          pcre_printint(re, outfile, debug_lengths);
1446          }
1447    
1448      if (do_showinfo)      if (do_showinfo)
1449        {        {
1450        unsigned long int get_options, all_options;        unsigned long int get_options, all_options;
1451  #if !defined NOINFOCHECK  #if !defined NOINFOCHECK
1452        int old_first_char, old_options, old_count;        int old_first_char, old_options, old_count;
1453  #endif  #endif
1454        int count, backrefmax, first_char, need_char;        int count, backrefmax, first_char, need_char, okpartial, jchanged,
1455            hascrorlf;
1456        int nameentrysize, namecount;        int nameentrysize, namecount;
1457        const uschar *nametable;        const uschar *nametable;
1458    
       if (do_debug)  
         {  
         fprintf(outfile, "------------------------------------------------------------------\n");  
         pcre_printint(re, outfile);  
         }  
   
1459        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1460        new_info(re, NULL, PCRE_INFO_SIZE, &size);        new_info(re, NULL, PCRE_INFO_SIZE, &size);
1461        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
# Line 1057  while (!done) Line 1465  while (!done)
1465        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1466        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1467        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1468          new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1469          new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1470          new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1471    
1472  #if !defined NOINFOCHECK  #if !defined NOINFOCHECK
1473        old_count = pcre_info(re, &old_options, &old_first_char);        old_count = pcre_info(re, &old_options, &old_first_char);
# Line 1098  while (!done) Line 1509  while (!done)
1509            }            }
1510          }          }
1511    
1512        /* The NOPARTIAL bit is a private bit in the options, so we have        if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1513        to fish it out via out back door */        if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
1514    
1515        all_options = ((real_pcre *)re)->options;        all_options = ((real_pcre *)re)->options;
1516        if (do_flip)        if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
         {  
         all_options = byteflip(all_options, sizeof(all_options));  
         }  
   
       if ((all_options & PCRE_NOPARTIAL) != 0)  
         fprintf(outfile, "Partial matching not supported\n");  
1517    
1518        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
1519          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1520            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1521            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1522            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1523            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1524            ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",            ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1525            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1526              ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
1527              ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
1528            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1529            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1530            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1531              ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1532            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1533            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "");            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1534              ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1535    
1536          if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1537    
1538          switch (get_options & PCRE_NEWLINE_BITS)
1539            {
1540            case PCRE_NEWLINE_CR:
1541            fprintf(outfile, "Forced newline sequence: CR\n");
1542            break;
1543    
1544            case PCRE_NEWLINE_LF:
1545            fprintf(outfile, "Forced newline sequence: LF\n");
1546            break;
1547    
1548            case PCRE_NEWLINE_CRLF:
1549            fprintf(outfile, "Forced newline sequence: CRLF\n");
1550            break;
1551    
1552            case PCRE_NEWLINE_ANYCRLF:
1553            fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1554            break;
1555    
1556        if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)          case PCRE_NEWLINE_ANY:
1557          fprintf(outfile, "Case state changes\n");          fprintf(outfile, "Forced newline sequence: ANY\n");
1558            break;
1559    
1560            default:
1561            break;
1562            }
1563    
1564        if (first_char == -1)        if (first_char == -1)
1565          {          {
1566          fprintf(outfile, "First char at start or follows \\n\n");          fprintf(outfile, "First char at start or follows newline\n");
1567          }          }
1568        else if (first_char < 0)        else if (first_char < 0)
1569          {          {
# Line 1140  while (!done) Line 1574  while (!done)
1574          int ch = first_char & 255;          int ch = first_char & 255;
1575          const char *caseless = ((first_char & REQ_CASELESS) == 0)?          const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1576            "" : " (caseless)";            "" : " (caseless)";
1577          if (isprint(ch))          if (PRINTHEX(ch))
1578            fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);            fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1579          else          else
1580            fprintf(outfile, "First char = %d%s\n", ch, caseless);            fprintf(outfile, "First char = %d%s\n", ch, caseless);
# Line 1155  while (!done) Line 1589  while (!done)
1589          int ch = need_char & 255;          int ch = need_char & 255;
1590          const char *caseless = ((need_char & REQ_CASELESS) == 0)?          const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1591            "" : " (caseless)";            "" : " (caseless)";
1592          if (isprint(ch))          if (PRINTHEX(ch))
1593            fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);            fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1594          else          else
1595            fprintf(outfile, "Need char = %d%s\n", ch, caseless);            fprintf(outfile, "Need char = %d%s\n", ch, caseless);
# Line 1191  while (!done) Line 1625  while (!done)
1625                    fprintf(outfile, "\n  ");                    fprintf(outfile, "\n  ");
1626                    c = 2;                    c = 2;
1627                    }                    }
1628                  if (isprint(i) && i != ' ')                  if (PRINTHEX(i) && i != ' ')
1629                    {                    {
1630                    fprintf(outfile, "%c ", i);                    fprintf(outfile, "%c ", i);
1631                    c += 2;                    c += 2;
# Line 1223  while (!done) Line 1657  while (!done)
1657        else        else
1658          {          {
1659          uschar sbuf[8];          uschar sbuf[8];
1660          sbuf[0] = (true_size >> 24)  & 255;          sbuf[0] = (uschar)((true_size >> 24) & 255);
1661          sbuf[1] = (true_size >> 16)  & 255;          sbuf[1] = (uschar)((true_size >> 16) & 255);
1662          sbuf[2] = (true_size >>  8)  & 255;          sbuf[2] = (uschar)((true_size >>  8) & 255);
1663          sbuf[3] = (true_size)  & 255;          sbuf[3] = (uschar)((true_size) & 255);
1664    
1665          sbuf[4] = (true_study_size >> 24)  & 255;          sbuf[4] = (uschar)((true_study_size >> 24) & 255);
1666          sbuf[5] = (true_study_size >> 16)  & 255;          sbuf[5] = (uschar)((true_study_size >> 16) & 255);
1667          sbuf[6] = (true_study_size >>  8)  & 255;          sbuf[6] = (uschar)((true_study_size >>  8) & 255);
1668          sbuf[7] = (true_study_size)  & 255;          sbuf[7] = (uschar)((true_study_size) & 255);
1669    
1670          if (fwrite(sbuf, 1, 8, f) < 8 ||          if (fwrite(sbuf, 1, 8, f) < 8 ||
1671              fwrite(re, 1, true_size, f) < true_size)              fwrite(re, 1, true_size, f) < true_size)
# Line 1250  while (!done) Line 1684  while (!done)
1684                  strerror(errno));                  strerror(errno));
1685                }                }
1686              else fprintf(outfile, "Study data written to %s\n", to_file);              else fprintf(outfile, "Study data written to %s\n", to_file);
1687    
1688              }              }
1689            }            }
1690          fclose(f);          fclose(f);
# Line 1266  while (!done) Line 1701  while (!done)
1701    
1702    for (;;)    for (;;)
1703      {      {
1704      unsigned char *q;      uschar *q;
1705      unsigned char *bptr = dbuffer;      uschar *bptr;
1706      int *use_offsets = offsets;      int *use_offsets = offsets;
1707      int use_size_offsets = size_offsets;      int use_size_offsets = size_offsets;
1708      int callout_data = 0;      int callout_data = 0;
# Line 1284  while (!done) Line 1719  while (!done)
1719    
1720      options = 0;      options = 0;
1721    
1722        *copynames = 0;
1723        *getnames = 0;
1724    
1725        copynamesptr = copynames;
1726        getnamesptr = getnames;
1727    
1728      pcre_callout = callout;      pcre_callout = callout;
1729      first_callout = 1;      first_callout = 1;
1730      callout_extra = 0;      callout_extra = 0;
# Line 1292  while (!done) Line 1733  while (!done)
1733      callout_fail_id = -1;      callout_fail_id = -1;
1734      show_malloc = 0;      show_malloc = 0;
1735    
1736      if (infile == stdin) printf("data> ");      if (extra != NULL) extra->flags &=
1737      if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)        ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1738    
1739        len = 0;
1740        for (;;)
1741        {        {
1742        done = 1;        if (extend_inputline(infile, buffer + len, "data> ") == NULL)
1743        goto CONTINUE;          {
1744            if (len > 0) break;
1745            done = 1;
1746            goto CONTINUE;
1747            }
1748          if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1749          len = (int)strlen((char *)buffer);
1750          if (buffer[len-1] == '\n') break;
1751        }        }
     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);  
1752    
     len = (int)strlen((char *)buffer);  
1753      while (len > 0 && isspace(buffer[len-1])) len--;      while (len > 0 && isspace(buffer[len-1])) len--;
1754      buffer[len] = 0;      buffer[len] = 0;
1755      if (len == 0) break;      if (len == 0) break;
# Line 1308  while (!done) Line 1757  while (!done)
1757      p = buffer;      p = buffer;
1758      while (isspace(*p)) p++;      while (isspace(*p)) p++;
1759    
1760      q = dbuffer;      bptr = q = dbuffer;
1761      while ((c = *p++) != 0)      while ((c = *p++) != 0)
1762        {        {
1763        int i = 0;        int i = 0;
# Line 1330  while (!done) Line 1779  while (!done)
1779          c -= '0';          c -= '0';
1780          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1781            c = c * 8 + *p++ - '0';            c = c * 8 + *p++ - '0';
1782    
1783    #if !defined NOUTF8
1784            if (use_utf8 && c > 255)
1785              {
1786              unsigned char buff8[8];
1787              int ii, utn;
1788              utn = ord2utf8(c, buff8);
1789              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1790              c = buff8[ii];   /* Last byte */
1791              }
1792    #endif
1793          break;          break;
1794    
1795          case 'x':          case 'x':
# Line 1347  while (!done) Line 1807  while (!done)
1807              {              {
1808              unsigned char buff8[8];              unsigned char buff8[8];
1809              int ii, utn;              int ii, utn;
1810              utn = ord2utf8(c, buff8);              if (use_utf8)
1811              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];                {
1812              c = buff8[ii];   /* Last byte */                utn = ord2utf8(c, buff8);
1813                  for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1814                  c = buff8[ii];   /* Last byte */
1815                  }
1816                else
1817                 {
1818                 if (c > 255)
1819                   fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
1820                     "UTF-8 mode is not enabled.\n"
1821                     "** Truncation will probably give the wrong result.\n", c);
1822                 }
1823              p = pt + 1;              p = pt + 1;
1824              break;              break;
1825              }              }
# Line 1391  while (!done) Line 1861  while (!done)
1861            }            }
1862          else if (isalnum(*p))          else if (isalnum(*p))
1863            {            {
1864            uschar name[256];            uschar *npp = copynamesptr;
           uschar *npp = name;  
1865            while (isalnum(*p)) *npp++ = *p++;            while (isalnum(*p)) *npp++ = *p++;
1866              *npp++ = 0;
1867            *npp = 0;            *npp = 0;
1868            n = pcre_get_stringnumber(re, (char *)name);            n = pcre_get_stringnumber(re, (char *)copynamesptr);
1869            if (n < 0)            if (n < 0)
1870              fprintf(outfile, "no parentheses with name \"%s\"\n", name);              fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1871            else copystrings |= 1 << n;            copynamesptr = npp;
1872            }            }
1873          else if (*p == '+')          else if (*p == '+')
1874            {            {
# Line 1459  while (!done) Line 1929  while (!done)
1929            }            }
1930          else if (isalnum(*p))          else if (isalnum(*p))
1931            {            {
1932            uschar name[256];            uschar *npp = getnamesptr;
           uschar *npp = name;  
1933            while (isalnum(*p)) *npp++ = *p++;            while (isalnum(*p)) *npp++ = *p++;
1934              *npp++ = 0;
1935            *npp = 0;            *npp = 0;
1936            n = pcre_get_stringnumber(re, (char *)name);            n = pcre_get_stringnumber(re, (char *)getnamesptr);
1937            if (n < 0)            if (n < 0)
1938              fprintf(outfile, "no parentheses with name \"%s\"\n", name);              fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1939            else getstrings |= 1 << n;            getnamesptr = npp;
1940            }            }
1941          continue;          continue;
1942    
# Line 1492  while (!done) Line 1962  while (!done)
1962            if (offsets == NULL)            if (offsets == NULL)
1963              {              {
1964              printf("** Failed to get %d bytes of memory for offsets vector\n",              printf("** Failed to get %d bytes of memory for offsets vector\n",
1965                size_offsets_max * sizeof(int));                (int)(size_offsets_max * sizeof(int)));
1966              yield = 1;              yield = 1;
1967              goto EXIT;              goto EXIT;
1968              }              }
# Line 1505  while (!done) Line 1975  while (!done)
1975          options |= PCRE_PARTIAL;          options |= PCRE_PARTIAL;
1976          continue;          continue;
1977    
1978            case 'Q':
1979            while(isdigit(*p)) n = n * 10 + *p++ - '0';
1980            if (extra == NULL)
1981              {
1982              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1983              extra->flags = 0;
1984              }
1985            extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1986            extra->match_limit_recursion = n;
1987            continue;
1988    
1989            case 'q':
1990            while(isdigit(*p)) n = n * 10 + *p++ - '0';
1991            if (extra == NULL)
1992              {
1993              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1994              extra->flags = 0;
1995              }
1996            extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1997            extra->match_limit = n;
1998            continue;
1999    
2000  #if !defined NODFA  #if !defined NODFA
2001          case 'R':          case 'R':
2002          options |= PCRE_DFA_RESTART;          options |= PCRE_DFA_RESTART;
# Line 1522  while (!done) Line 2014  while (!done)
2014          case '?':          case '?':
2015          options |= PCRE_NO_UTF8_CHECK;          options |= PCRE_NO_UTF8_CHECK;
2016          continue;          continue;
2017    
2018            case '<':
2019              {
2020              int x = check_newline(p, outfile);
2021              if (x == 0) goto NEXT_DATA;
2022              options |= x;
2023              while (*p++ != '>');
2024              }
2025            continue;
2026          }          }
2027        *q++ = c;        *q++ = c;
2028        }        }
2029      *q = 0;      *q = 0;
2030      len = q - dbuffer;      len = q - dbuffer;
2031    
2032        /* Move the data to the end of the buffer so that a read over the end of
2033        the buffer will be seen by valgrind, even if it doesn't cause a crash. If
2034        we are using the POSIX interface, we must include the terminating zero. */
2035    
2036    #if !defined NOPOSIX
2037        if (posix || do_posix)
2038          {
2039          memmove(bptr + buffer_size - len - 1, bptr, len + 1);
2040          bptr += buffer_size - len - 1;
2041          }
2042        else
2043    #endif
2044          {
2045          memmove(bptr + buffer_size - len, bptr, len);
2046          bptr += buffer_size - len;
2047          }
2048    
2049      if ((all_use_dfa || use_dfa) && find_match_limit)      if ((all_use_dfa || use_dfa) && find_match_limit)
2050        {        {
2051        printf("**Match limit not relevant for DFA matching: ignored\n");        printf("**Match limit not relevant for DFA matching: ignored\n");
# Line 1552  while (!done) Line 2070  while (!done)
2070    
2071        if (rc != 0)        if (rc != 0)
2072          {          {
2073          (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);          (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2074          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
2075          }          }
2076          else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
2077                  != 0)
2078            {
2079            fprintf(outfile, "Matched with REG_NOSUB\n");
2080            }
2081        else        else
2082          {          {
2083          size_t i;          size_t i;
# Line 1586  while (!done) Line 2109  while (!done)
2109    
2110      for (;; gmatched++)    /* Loop for /g or /G */      for (;; gmatched++)    /* Loop for /g or /G */
2111        {        {
2112        if (timeit)        if (timeitm > 0)
2113          {          {
2114          register int i;          register int i;
2115          clock_t time_taken;          clock_t time_taken;
# Line 1596  while (!done) Line 2119  while (!done)
2119          if (all_use_dfa || use_dfa)          if (all_use_dfa || use_dfa)
2120            {            {
2121            int workspace[1000];            int workspace[1000];
2122            for (i = 0; i < LOOPREPEAT; i++)            for (i = 0; i < timeitm; i++)
2123              count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,              count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2124                options | g_notempty, use_offsets, use_size_offsets, workspace,                options | g_notempty, use_offsets, use_size_offsets, workspace,
2125                sizeof(workspace)/sizeof(int));                sizeof(workspace)/sizeof(int));
# Line 1604  while (!done) Line 2127  while (!done)
2127          else          else
2128  #endif  #endif
2129    
2130          for (i = 0; i < LOOPREPEAT; i++)          for (i = 0; i < timeitm; i++)
2131            count = pcre_exec(re, extra, (char *)bptr, len,            count = pcre_exec(re, extra, (char *)bptr, len,
2132              start_offset, options | g_notempty, use_offsets, use_size_offsets);              start_offset, options | g_notempty, use_offsets, use_size_offsets);
2133    
2134          time_taken = clock() - start_time;          time_taken = clock() - start_time;
2135          fprintf(outfile, "Execute time %.3f milliseconds\n",          fprintf(outfile, "Execute time %.4f milliseconds\n",
2136            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /            (((double)time_taken * 1000.0) / (double)timeitm) /
2137              (double)CLOCKS_PER_SEC);              (double)CLOCKS_PER_SEC);
2138          }          }
2139    
2140        /* If find_match_limit is set, we want to do repeated matches with        /* If find_match_limit is set, we want to do repeated matches with
2141        varying limits in order to find the minimum value. */        varying limits in order to find the minimum value for the match limit and
2142          for the recursion limit. */
2143    
2144        if (find_match_limit)        if (find_match_limit)
2145          {          {
         int min = 0;  
         int mid = 64;  
         int max = -1;  
   
2146          if (extra == NULL)          if (extra == NULL)
2147            {            {
2148            extra = (pcre_extra *)malloc(sizeof(pcre_extra));            extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2149            extra->flags = 0;            extra->flags = 0;
2150            }            }
         extra->flags |= PCRE_EXTRA_MATCH_LIMIT;  
2151    
2152          for (;;)          (void)check_match_limit(re, extra, bptr, len, start_offset,
2153            {            options|g_notempty, use_offsets, use_size_offsets,
2154            extra->match_limit = mid;            PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2155            count = pcre_exec(re, extra, (char *)bptr, len, start_offset,            PCRE_ERROR_MATCHLIMIT, "match()");
2156              options | g_notempty, use_offsets, use_size_offsets);  
2157            if (count == PCRE_ERROR_MATCHLIMIT)          count = check_match_limit(re, extra, bptr, len, start_offset,
2158              {            options|g_notempty, use_offsets, use_size_offsets,
2159              /* fprintf(outfile, "Testing match limit = %d\n", mid); */            PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2160              min = mid;            PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
             mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;  
             }  
           else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||  
                                  count == PCRE_ERROR_PARTIAL)  
             {  
             if (mid == min + 1)  
               {  
               fprintf(outfile, "Minimum match limit = %d\n", mid);  
               break;  
               }  
             /* fprintf(outfile, "Testing match limit = %d\n", mid); */  
             max = mid;  
             mid = (min + mid)/2;  
             }  
           else break;    /* Some other error */  
           }  
   
         extra->flags &= ~PCRE_EXTRA_MATCH_LIMIT;  
2161          }          }
2162    
2163        /* If callout_data is set, use the interface with additional data */        /* If callout_data is set, use the interface with additional data */
# Line 1708  while (!done) Line 2209  while (!done)
2209    
2210        if (count >= 0)        if (count >= 0)
2211          {          {
2212          int i;          int i, maxcount;
2213    
2214    #if !defined NODFA
2215            if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2216    #endif
2217              maxcount = use_size_offsets/3;
2218    
2219            /* This is a check against a lunatic return value. */
2220    
2221            if (count > maxcount)
2222              {
2223              fprintf(outfile,
2224                "** PCRE error: returned count %d is too big for offset size %d\n",
2225                count, use_size_offsets);
2226              count = use_size_offsets/3;
2227              if (do_g || do_G)
2228                {
2229                fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2230                do_g = do_G = FALSE;        /* Break g/G loop */
2231                }
2232              }
2233    
2234          for (i = 0; i < count * 2; i += 2)          for (i = 0; i < count * 2; i += 2)
2235            {            {
2236            if (use_offsets[i] < 0)            if (use_offsets[i] < 0)
# Line 1736  while (!done) Line 2258  while (!done)
2258            {            {
2259            if ((copystrings & (1 << i)) != 0)            if ((copystrings & (1 << i)) != 0)
2260              {              {
2261              char copybuffer[16];              char copybuffer[256];
2262              int rc = pcre_copy_substring((char *)bptr, use_offsets, count,              int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2263                i, copybuffer, sizeof(copybuffer));                i, copybuffer, sizeof(copybuffer));
2264              if (rc < 0)              if (rc < 0)
# Line 1746  while (!done) Line 2268  while (!done)
2268              }              }
2269            }            }
2270    
2271            for (copynamesptr = copynames;
2272                 *copynamesptr != 0;
2273                 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2274              {
2275              char copybuffer[256];
2276              int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2277                count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2278              if (rc < 0)
2279                fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2280              else
2281                fprintf(outfile, "  C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2282              }
2283    
2284          for (i = 0; i < 32; i++)          for (i = 0; i < 32; i++)
2285            {            {
2286            if ((getstrings & (1 << i)) != 0)            if ((getstrings & (1 << i)) != 0)
# Line 1758  while (!done) Line 2293  while (!done)
2293              else              else
2294                {                {
2295                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
               /* free((void *)substring); */  
2296                pcre_free_substring(substring);                pcre_free_substring(substring);
2297                }                }
2298              }              }
2299            }            }
2300    
2301            for (getnamesptr = getnames;
2302                 *getnamesptr != 0;
2303                 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2304              {
2305              const char *substring;
2306              int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2307                count, (char *)getnamesptr, &substring);
2308              if (rc < 0)
2309                fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2310              else
2311                {
2312                fprintf(outfile, "  G %s (%d) %s\n", substring, rc, getnamesptr);
2313                pcre_free_substring(substring);
2314                }
2315              }
2316    
2317          if (getlist)          if (getlist)
2318            {            {
2319            const char **stringlist;            const char **stringlist;
# Line 1798  while (!done) Line 2348  while (!done)
2348          }          }
2349    
2350        /* Failed to match. If this is a /g or /G loop and we previously set        /* Failed to match. If this is a /g or /G loop and we previously set
2351        g_notempty after a null match, this is not necessarily the end.        g_notempty after a null match, this is not necessarily the end. We want
2352        We want to advance the start offset, and continue. In the case of UTF-8        to advance the start offset, and continue. We won't be at the end of the
2353        matching, the advance must be one character, not one byte. Fudge the        string - that was checked before setting g_notempty.
2354        offset values to achieve this. We won't be at the end of the string -  
2355        that was checked before setting g_notempty. */        Complication arises in the case when the newline option is "any" or
2356          "anycrlf". If the previous match was at the end of a line terminated by
2357          CRLF, an advance of one character just passes the \r, whereas we should
2358          prefer the longer newline sequence, as does the code in pcre_exec().
2359          Fudge the offset value to achieve this.
2360    
2361          Otherwise, in the case of UTF-8 matching, the advance must be one
2362          character, not one byte. */
2363    
2364        else        else
2365          {          {
2366          if (g_notempty != 0)          if (g_notempty != 0)
2367            {            {
2368            int onechar = 1;            int onechar = 1;
2369              unsigned int obits = ((real_pcre *)re)->options;
2370            use_offsets[0] = start_offset;            use_offsets[0] = start_offset;
2371            if (use_utf8)            if ((obits & PCRE_NEWLINE_BITS) == 0)
2372                {
2373                int d;
2374                (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2375                obits = (d == '\r')? PCRE_NEWLINE_CR :
2376                        (d == '\n')? PCRE_NEWLINE_LF :
2377                        (d == ('\r'<<8 | '\n'))? PCRE_NEWLINE_CRLF :
2378                        (d == -2)? PCRE_NEWLINE_ANYCRLF :
2379                        (d == -1)? PCRE_NEWLINE_ANY : 0;
2380                }
2381              if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2382                   (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2383                  &&
2384                  start_offset < len - 1 &&
2385                  bptr[start_offset] == '\r' &&
2386                  bptr[start_offset+1] == '\n')
2387                onechar++;
2388              else if (use_utf8)
2389              {              {
2390              while (start_offset + onechar < len)              while (start_offset + onechar < len)
2391                {                {
# Line 1845  while (!done) Line 2420  while (!done)
2420        character. */        character. */
2421    
2422        g_notempty = 0;        g_notempty = 0;
2423    
2424        if (use_offsets[0] == use_offsets[1])        if (use_offsets[0] == use_offsets[1])
2425          {          {
2426          if (use_offsets[0] == len) break;          if (use_offsets[0] == len) break;
# Line 1863  while (!done) Line 2439  while (!done)
2439          len -= use_offsets[1];          len -= use_offsets[1];
2440          }          }
2441        }  /* End of loop for /g and /G */        }  /* End of loop for /g and /G */
2442    
2443        NEXT_DATA: continue;
2444      }    /* End of loop for data lines */      }    /* End of loop for data lines */
2445    
2446    CONTINUE:    CONTINUE:
# Line 1877  while (!done) Line 2455  while (!done)
2455      {      {
2456      new_free((void *)tables);      new_free((void *)tables);
2457      setlocale(LC_CTYPE, "C");      setlocale(LC_CTYPE, "C");
2458        locale_set = 0;
2459      }      }
2460    }    }
2461    

Legend:
Removed from v.85  
changed lines
  Added in v.376

  ViewVC Help
Powered by ViewVC 1.1.5