/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 85 by nigel, Sat Feb 24 21:41:13 2007 UTC revision 336 by ph10, Sat Apr 12 15:59:03 2008 UTC
# Line 36  POSSIBILITY OF SUCH DAMAGE. Line 36  POSSIBILITY OF SUCH DAMAGE.
36  */  */
37    
38    
39    #ifdef HAVE_CONFIG_H
40    #include "config.h"
41    #endif
42    
43  #include <ctype.h>  #include <ctype.h>
44  #include <stdio.h>  #include <stdio.h>
45  #include <string.h>  #include <string.h>
# Line 44  POSSIBILITY OF SUCH DAMAGE. Line 48  POSSIBILITY OF SUCH DAMAGE.
48  #include <locale.h>  #include <locale.h>
49  #include <errno.h>  #include <errno.h>
50    
51  #define PCRE_SPY        /* For Win32 build, import data, not export */  #ifdef SUPPORT_LIBREADLINE
52    #include <unistd.h>
53    #include <readline/readline.h>
54    #include <readline/history.h>
55    #endif
56    
57    
58    /* A number of things vary for Windows builds. Originally, pcretest opened its
59    input and output without "b"; then I was told that "b" was needed in some
60    environments, so it was added for release 5.0 to both the input and output. (It
61    makes no difference on Unix-like systems.) Later I was told that it is wrong
62    for the input on Windows. I've now abstracted the modes into two macros that
63    are set here, to make it easier to fiddle with them, and removed "b" from the
64    input mode under Windows. */
65    
66    #if defined(_WIN32) || defined(WIN32)
67    #include <io.h>                /* For _setmode() */
68    #include <fcntl.h>             /* For _O_BINARY */
69    #define INPUT_MODE   "r"
70    #define OUTPUT_MODE  "wb"
71    
72    #else
73    #include <sys/time.h>          /* These two includes are needed */
74    #include <sys/resource.h>      /* for setrlimit(). */
75    #define INPUT_MODE   "rb"
76    #define OUTPUT_MODE  "wb"
77    #endif
78    
79    
80    /* We have to include pcre_internal.h because we need the internal info for
81    displaying the results of pcre_study() and we also need to know about the
82    internal macros, structures, and other internal data values; pcretest has
83    "inside information" compared to a program that strictly follows the PCRE API.
84    
85  /* We include pcre_internal.h because we need the internal info for displaying  Although pcre_internal.h does itself include pcre.h, we explicitly include it
86  the results of pcre_study() and we also need to know about the internal  here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
87  macros, structures, and other internal data values; pcretest has "inside  appropriately for an application, not for building PCRE. */
 information" compared to a program that strictly follows the PCRE API. */  
88    
89    #include "pcre.h"
90  #include "pcre_internal.h"  #include "pcre_internal.h"
91    
92  /* We need access to the data tables that PCRE uses. So as not to have to keep  /* We need access to the data tables that PCRE uses. So as not to have to keep
# Line 64  symbols to prevent clashes. */ Line 100  symbols to prevent clashes. */
100  #define _pcre_utf8_table4      utf8_table4  #define _pcre_utf8_table4      utf8_table4
101  #define _pcre_utt              utt  #define _pcre_utt              utt
102  #define _pcre_utt_size         utt_size  #define _pcre_utt_size         utt_size
103    #define _pcre_utt_names        utt_names
104  #define _pcre_OP_lengths       OP_lengths  #define _pcre_OP_lengths       OP_lengths
105    
106  #include "pcre_tables.c"  #include "pcre_tables.c"
107    
108  /* We also need the pcre_printint() function for printing out compiled  /* We also need the pcre_printint() function for printing out compiled
109  patterns. This function is in a separate file so that it can be included in  patterns. This function is in a separate file so that it can be included in
110  pcre_compile.c when that module is compiled with debugging enabled. */  pcre_compile.c when that module is compiled with debugging enabled.
111    
112    The definition of the macro PRINTABLE, which determines whether to print an
113    output character as-is or as a hex value when showing compiled patterns, is
114    contained in this file. We uses it here also, in cases when the locale has not
115    been explicitly changed, so as to get consistent output from systems that
116    differ in their output from isprint() even in the "C" locale. */
117    
118  #include "pcre_printint.src"  #include "pcre_printint.src"
119    
120    #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
121    
122    
123  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
124  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 83  Makefile. */ Line 128  Makefile. */
128  #include "pcreposix.h"  #include "pcreposix.h"
129  #endif  #endif
130    
131  /* It is also possible, for the benefit of the version imported into Exim, to  /* It is also possible, for the benefit of the version currently imported into
132  build pcretest without support for UTF8 (define NOUTF8), without the interface  Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
133  to the DFA matcher (NODFA), and without the doublecheck of the old "info"  interface to the DFA matcher (NODFA), and without the doublecheck of the old
134  function (define NOINFOCHECK). */  "info" function (define NOINFOCHECK). In fact, we automatically cut out the
135    UTF8 support if PCRE is built without it. */
136    
137    #ifndef SUPPORT_UTF8
138    #ifndef NOUTF8
139    #define NOUTF8
140    #endif
141    #endif
142    
143    
144  /* Other parameters */  /* Other parameters */
# Line 99  function (define NOINFOCHECK). */ Line 151  function (define NOINFOCHECK). */
151  #endif  #endif
152  #endif  #endif
153    
154  #define LOOPREPEAT 500000  /* This is the default loop count for timing. */
   
 #define BUFFER_SIZE 30000  
 #define PBUFFER_SIZE BUFFER_SIZE  
 #define DBUFFER_SIZE BUFFER_SIZE  
155    
156    #define LOOPREPEAT 500000
157    
158  /* Static variables */  /* Static variables */
159    
# Line 114  static int callout_count; Line 163  static int callout_count;
163  static int callout_extra;  static int callout_extra;
164  static int callout_fail_count;  static int callout_fail_count;
165  static int callout_fail_id;  static int callout_fail_id;
166    static int debug_lengths;
167  static int first_callout;  static int first_callout;
168    static int locale_set = 0;
169  static int show_malloc;  static int show_malloc;
170  static int use_utf8;  static int use_utf8;
171  static size_t gotten_store;  static size_t gotten_store;
172    
173    /* The buffers grow automatically if very long input lines are encountered. */
174    
175    static int buffer_size = 50000;
176    static uschar *buffer = NULL;
177    static uschar *dbuffer = NULL;
178  static uschar *pbuffer = NULL;  static uschar *pbuffer = NULL;
179    
180    
181    
182  /*************************************************  /*************************************************
183    *        Read or extend an input line            *
184    *************************************************/
185    
186    /* Input lines are read into buffer, but both patterns and data lines can be
187    continued over multiple input lines. In addition, if the buffer fills up, we
188    want to automatically expand it so as to be able to handle extremely large
189    lines that are needed for certain stress tests. When the input buffer is
190    expanded, the other two buffers must also be expanded likewise, and the
191    contents of pbuffer, which are a copy of the input for callouts, must be
192    preserved (for when expansion happens for a data line). This is not the most
193    optimal way of handling this, but hey, this is just a test program!
194    
195    Arguments:
196      f            the file to read
197      start        where in buffer to start (this *must* be within buffer)
198      prompt       for stdin or readline()
199    
200    Returns:       pointer to the start of new data
201                   could be a copy of start, or could be moved
202                   NULL if no data read and EOF reached
203    */
204    
205    static uschar *
206    extend_inputline(FILE *f, uschar *start, const char *prompt)
207    {
208    uschar *here = start;
209    
210    for (;;)
211      {
212      int rlen = buffer_size - (here - buffer);
213    
214      if (rlen > 1000)
215        {
216        int dlen;
217    
218        /* If libreadline support is required, use readline() to read a line if the
219        input is a terminal. Note that readline() removes the trailing newline, so
220        we must put it back again, to be compatible with fgets(). */
221    
222    #ifdef SUPPORT_LIBREADLINE
223        if (isatty(fileno(f)))
224          {
225          size_t len;
226          char *s = readline(prompt);
227          if (s == NULL) return (here == start)? NULL : start;
228          len = strlen(s);
229          if (len > 0) add_history(s);
230          if (len > rlen - 1) len = rlen - 1;
231          memcpy(here, s, len);
232          here[len] = '\n';
233          here[len+1] = 0;
234          free(s);
235          }
236        else
237    #endif
238    
239        /* Read the next line by normal means, prompting if the file is stdin. */
240    
241          {
242          if (f == stdin) printf(prompt);
243          if (fgets((char *)here, rlen,  f) == NULL)
244            return (here == start)? NULL : start;
245          }
246    
247        dlen = (int)strlen((char *)here);
248        if (dlen > 0 && here[dlen - 1] == '\n') return start;
249        here += dlen;
250        }
251    
252      else
253        {
254        int new_buffer_size = 2*buffer_size;
255        uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
256        uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
257        uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
258    
259        if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
260          {
261          fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
262          exit(1);
263          }
264    
265        memcpy(new_buffer, buffer, buffer_size);
266        memcpy(new_pbuffer, pbuffer, buffer_size);
267    
268        buffer_size = new_buffer_size;
269    
270        start = new_buffer + (start - buffer);
271        here = new_buffer + (here - buffer);
272    
273        free(buffer);
274        free(dbuffer);
275        free(pbuffer);
276    
277        buffer = new_buffer;
278        dbuffer = new_dbuffer;
279        pbuffer = new_pbuffer;
280        }
281      }
282    
283    return NULL;  /* Control never gets here */
284    }
285    
286    
287    
288    
289    
290    
291    
292    /*************************************************
293  *          Read number from string               *  *          Read number from string               *
294  *************************************************/  *************************************************/
295    
296  /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess  /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
297  around with conditional compilation, just do the job by hand. It is only used  around with conditional compilation, just do the job by hand. It is only used
298  for unpicking the -o argument, so just keep it simple.  for unpicking arguments, so just keep it simple.
299    
300  Arguments:  Arguments:
301    str           string to be converted    str           string to be converted
# Line 159  return(result); Line 325  return(result);
325  and returns the value of the character.  and returns the value of the character.
326    
327  Argument:  Argument:
328    buffer   a pointer to the byte vector    utf8bytes   a pointer to the byte vector
329    vptr     a pointer to an int to receive the value    vptr        a pointer to an int to receive the value
330    
331  Returns:   >  0 => the number of bytes consumed  Returns:      >  0 => the number of bytes consumed
332             -6 to 0 => malformed UTF-8 character at offset = (-return)                -6 to 0 => malformed UTF-8 character at offset = (-return)
333  */  */
334    
335  #if !defined NOUTF8  #if !defined NOUTF8
336    
337  static int  static int
338  utf82ord(unsigned char *buffer, int *vptr)  utf82ord(unsigned char *utf8bytes, int *vptr)
339  {  {
340  int c = *buffer++;  int c = *utf8bytes++;
341  int d = c;  int d = c;
342  int i, j, s;  int i, j, s;
343    
# Line 191  d = (c & utf8_table3[i]) << s; Line 357  d = (c & utf8_table3[i]) << s;
357    
358  for (j = 0; j < i; j++)  for (j = 0; j < i; j++)
359    {    {
360    c = *buffer++;    c = *utf8bytes++;
361    if ((c & 0xc0) != 0x80) return -(j+1);    if ((c & 0xc0) != 0x80) return -(j+1);
362    s -= 6;    s -= 6;
363    d |= (c & 0x3f) << s;    d |= (c & 0x3f) << s;
# Line 222  and encodes it as a UTF-8 character in 0 Line 388  and encodes it as a UTF-8 character in 0
388    
389  Arguments:  Arguments:
390    cvalue     the character value    cvalue     the character value
391    buffer     pointer to buffer for result - at least 6 bytes long    utf8bytes  pointer to buffer for result - at least 6 bytes long
392    
393  Returns:     number of characters placed in the buffer  Returns:     number of characters placed in the buffer
394  */  */
395    
396    #if !defined NOUTF8
397    
398  static int  static int
399  ord2utf8(int cvalue, uschar *buffer)  ord2utf8(int cvalue, uschar *utf8bytes)
400  {  {
401  register int i, j;  register int i, j;
402  for (i = 0; i < utf8_table1_size; i++)  for (i = 0; i < utf8_table1_size; i++)
403    if (cvalue <= utf8_table1[i]) break;    if (cvalue <= utf8_table1[i]) break;
404  buffer += i;  utf8bytes += i;
405  for (j = i; j > 0; j--)  for (j = i; j > 0; j--)
406   {   {
407   *buffer-- = 0x80 | (cvalue & 0x3f);   *utf8bytes-- = 0x80 | (cvalue & 0x3f);
408   cvalue >>= 6;   cvalue >>= 6;
409   }   }
410  *buffer = utf8_table2[i] | cvalue;  *utf8bytes = utf8_table2[i] | cvalue;
411  return i + 1;  return i + 1;
412  }  }
413    
414    #endif
415    
416    
417    
418  /*************************************************  /*************************************************
# Line 269  while (length-- > 0) Line 439  while (length-- > 0)
439        {        {
440        length -= rc - 1;        length -= rc - 1;
441        p += rc;        p += rc;
442        if (c < 256 && isprint(c))        if (PRINTHEX(c))
443          {          {
444          if (f != NULL) fprintf(f, "%c", c);          if (f != NULL) fprintf(f, "%c", c);
445          yield++;          yield++;
446          }          }
447        else        else
448          {          {
449          int n;          int n = 4;
450          if (f != NULL) fprintf(f, "\\x{%02x}%n", c, &n);          if (f != NULL) fprintf(f, "\\x{%02x}", c);
451          yield += n;          yield += (n <= 0x000000ff)? 2 :
452                     (n <= 0x00000fff)? 3 :
453                     (n <= 0x0000ffff)? 4 :
454                     (n <= 0x000fffff)? 5 : 6;
455          }          }
456        continue;        continue;
457        }        }
# Line 287  while (length-- > 0) Line 460  while (length-- > 0)
460    
461     /* Not UTF-8, or malformed UTF-8  */     /* Not UTF-8, or malformed UTF-8  */
462    
463    if (isprint(c = *(p++)))    c = *p++;
464      if (PRINTHEX(c))
465      {      {
466      if (f != NULL) fprintf(f, "%c", c);      if (f != NULL) fprintf(f, "%c", c);
467      yield++;      yield++;
# Line 461  if ((rc = pcre_fullinfo(re, study, optio Line 635  if ((rc = pcre_fullinfo(re, study, optio
635  *         Byte flipping function                 *  *         Byte flipping function                 *
636  *************************************************/  *************************************************/
637    
638  static long int  static unsigned long int
639  byteflip(long int value, int n)  byteflip(unsigned long int value, int n)
640  {  {
641  if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);  if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
642  return ((value & 0x000000ff) << 24) |  return ((value & 0x000000ff) << 24) |
# Line 475  return ((value & 0x000000ff) << 24) | Line 649  return ((value & 0x000000ff) << 24) |
649    
650    
651  /*************************************************  /*************************************************
652    *        Check match or recursion limit          *
653    *************************************************/
654    
655    static int
656    check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
657      int start_offset, int options, int *use_offsets, int use_size_offsets,
658      int flag, unsigned long int *limit, int errnumber, const char *msg)
659    {
660    int count;
661    int min = 0;
662    int mid = 64;
663    int max = -1;
664    
665    extra->flags |= flag;
666    
667    for (;;)
668      {
669      *limit = mid;
670    
671      count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
672        use_offsets, use_size_offsets);
673    
674      if (count == errnumber)
675        {
676        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
677        min = mid;
678        mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
679        }
680    
681      else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
682                             count == PCRE_ERROR_PARTIAL)
683        {
684        if (mid == min + 1)
685          {
686          fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
687          break;
688          }
689        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
690        max = mid;
691        mid = (min + mid)/2;
692        }
693      else break;    /* Some other error */
694      }
695    
696    extra->flags &= ~flag;
697    return count;
698    }
699    
700    
701    
702    /*************************************************
703    *         Case-independent strncmp() function    *
704    *************************************************/
705    
706    /*
707    Arguments:
708      s         first string
709      t         second string
710      n         number of characters to compare
711    
712    Returns:    < 0, = 0, or > 0, according to the comparison
713    */
714    
715    static int
716    strncmpic(uschar *s, uschar *t, int n)
717    {
718    while (n--)
719      {
720      int c = tolower(*s++) - tolower(*t++);
721      if (c) return c;
722      }
723    return 0;
724    }
725    
726    
727    
728    /*************************************************
729    *         Check newline indicator                *
730    *************************************************/
731    
732    /* This is used both at compile and run-time to check for <xxx> escapes, where
733    xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is
734    no match.
735    
736    Arguments:
737      p           points after the leading '<'
738      f           file for error message
739    
740    Returns:      appropriate PCRE_NEWLINE_xxx flags, or 0
741    */
742    
743    static int
744    check_newline(uschar *p, FILE *f)
745    {
746    if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
747    if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
748    if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
749    if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
750    if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
751    if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
752    if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
753    fprintf(f, "Unknown newline type at: <%s\n", p);
754    return 0;
755    }
756    
757    
758    
759    /*************************************************
760    *             Usage function                     *
761    *************************************************/
762    
763    static void
764    usage(void)
765    {
766    printf("Usage:     pcretest [options] [<input file> [<output file>]]\n\n");
767    printf("Input and output default to stdin and stdout.\n");
768    #ifdef SUPPORT_LIBREADLINE
769    printf("If input is a terminal, readline() is used to read from it.\n");
770    #else
771    printf("This version of pcretest is not linked with readline().\n");
772    #endif
773    printf("\nOptions:\n");
774    printf("  -b       show compiled code (bytecode)\n");
775    printf("  -C       show PCRE compile-time options and exit\n");
776    printf("  -d       debug: show compiled code and information (-b and -i)\n");
777    #if !defined NODFA
778    printf("  -dfa     force DFA matching for all subjects\n");
779    #endif
780    printf("  -help    show usage information\n");
781    printf("  -i       show information about compiled patterns\n"
782           "  -m       output memory used information\n"
783           "  -o <n>   set size of offsets vector to <n>\n");
784    #if !defined NOPOSIX
785    printf("  -p       use POSIX interface\n");
786    #endif
787    printf("  -q       quiet: do not output PCRE version number at start\n");
788    printf("  -S <n>   set stack size to <n> megabytes\n");
789    printf("  -s       output store (memory) used information\n"
790           "  -t       time compilation and execution\n");
791    printf("  -t <n>   time compilation and execution, repeating <n> times\n");
792    printf("  -tm      time execution (matching) only\n");
793    printf("  -tm <n>  time execution (matching) only, repeating <n> times\n");
794    }
795    
796    
797    
798    /*************************************************
799  *                Main Program                    *  *                Main Program                    *
800  *************************************************/  *************************************************/
801    
# Line 489  int options = 0; Line 810  int options = 0;
810  int study_options = 0;  int study_options = 0;
811  int op = 1;  int op = 1;
812  int timeit = 0;  int timeit = 0;
813    int timeitm = 0;
814  int showinfo = 0;  int showinfo = 0;
815  int showstore = 0;  int showstore = 0;
816    int quiet = 0;
817  int size_offsets = 45;  int size_offsets = 45;
818  int size_offsets_max;  int size_offsets_max;
819  int *offsets = NULL;  int *offsets = NULL;
# Line 501  int debug = 0; Line 824  int debug = 0;
824  int done = 0;  int done = 0;
825  int all_use_dfa = 0;  int all_use_dfa = 0;
826  int yield = 0;  int yield = 0;
827    int stack_size;
828    
829  unsigned char *buffer;  /* These vectors store, end-to-end, a list of captured substring names. Assume
830  unsigned char *dbuffer;  that 1024 is plenty long enough for the few names we'll be testing. */
831    
832    uschar copynames[1024];
833    uschar getnames[1024];
834    
835    uschar *copynamesptr;
836    uschar *getnamesptr;
837    
838  /* Get buffers from malloc() so that Electric Fence will check their misuse  /* Get buffers from malloc() so that Electric Fence will check their misuse
839  when I am debugging. */  when I am debugging. They grow automatically when very long lines are read. */
840    
841  buffer = (unsigned char *)malloc(BUFFER_SIZE);  buffer = (unsigned char *)malloc(buffer_size);
842  dbuffer = (unsigned char *)malloc(DBUFFER_SIZE);  dbuffer = (unsigned char *)malloc(buffer_size);
843  pbuffer = (unsigned char *)malloc(PBUFFER_SIZE);  pbuffer = (unsigned char *)malloc(buffer_size);
   
 /* The outfile variable is static so that new_malloc can use it. The _setmode()  
 stuff is some magic that I don't understand, but which apparently does good  
 things in Windows. It's related to line terminations.  */  
844    
845  #if defined(_WIN32) || defined(WIN32)  /* The outfile variable is static so that new_malloc can use it. */
 _setmode( _fileno( stdout ), 0x8000 );  
 #endif  /* defined(_WIN32) || defined(WIN32) */  
846    
847  outfile = stdout;  outfile = stdout;
848    
849    /* The following  _setmode() stuff is some Windows magic that tells its runtime
850    library to translate CRLF into a single LF character. At least, that's what
851    I've been told: never having used Windows I take this all on trust. Originally
852    it set 0x8000, but then I was advised that _O_BINARY was better. */
853    
854    #if defined(_WIN32) || defined(WIN32)
855    _setmode( _fileno( stdout ), _O_BINARY );
856    #endif
857    
858  /* Scan options */  /* Scan options */
859    
860  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
# Line 530  while (argc > 1 && argv[op][0] == '-') Line 863  while (argc > 1 && argv[op][0] == '-')
863    
864    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
865      showstore = 1;      showstore = 1;
866    else if (strcmp(argv[op], "-t") == 0) timeit = 1;    else if (strcmp(argv[op], "-q") == 0) quiet = 1;
867      else if (strcmp(argv[op], "-b") == 0) debug = 1;
868    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
869    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
870  #if !defined NODFA  #if !defined NODFA
# Line 543  while (argc > 1 && argv[op][0] == '-') Line 877  while (argc > 1 && argv[op][0] == '-')
877      op++;      op++;
878      argc--;      argc--;
879      }      }
880      else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
881        {
882        int both = argv[op][2] == 0;
883        int temp;
884        if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
885                         *endptr == 0))
886          {
887          timeitm = temp;
888          op++;
889          argc--;
890          }
891        else timeitm = LOOPREPEAT;
892        if (both) timeit = timeitm;
893        }
894      else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
895          ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
896            *endptr == 0))
897        {
898    #if defined(_WIN32) || defined(WIN32)
899        printf("PCRE: -S not supported on this OS\n");
900        exit(1);
901    #else
902        int rc;
903        struct rlimit rlim;
904        getrlimit(RLIMIT_STACK, &rlim);
905        rlim.rlim_cur = stack_size * 1024 * 1024;
906        rc = setrlimit(RLIMIT_STACK, &rlim);
907        if (rc != 0)
908          {
909        printf("PCRE: setrlimit() failed with error %d\n", rc);
910        exit(1);
911          }
912        op++;
913        argc--;
914    #endif
915        }
916  #if !defined NOPOSIX  #if !defined NOPOSIX
917    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(argv[op], "-p") == 0) posix = 1;
918  #endif  #endif
# Line 556  while (argc > 1 && argv[op][0] == '-') Line 926  while (argc > 1 && argv[op][0] == '-')
926      (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);      (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
927      printf("  %sUnicode properties support\n", rc? "" : "No ");      printf("  %sUnicode properties support\n", rc? "" : "No ");
928      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
929      printf("  Newline character is %s\n", (rc == '\r')? "CR" : "LF");      printf("  Newline sequence is %s\n", (rc == '\r')? "CR" :
930          (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :
931          (rc == -2)? "ANYCRLF" :
932          (rc == -1)? "ANY" : "???");
933        (void)pcre_config(PCRE_CONFIG_BSR, &rc);
934        printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :
935                                         "all Unicode newlines");
936      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
937      printf("  Internal link size = %d\n", rc);      printf("  Internal link size = %d\n", rc);
938      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
939      printf("  POSIX malloc threshold = %d\n", rc);      printf("  POSIX malloc threshold = %d\n", rc);
940      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
941      printf("  Default match limit = %d\n", rc);      printf("  Default match limit = %d\n", rc);
942        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);
943        printf("  Default recursion depth limit = %d\n", rc);
944      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
945      printf("  Match recursion uses %s\n", rc? "stack" : "heap");      printf("  Match recursion uses %s\n", rc? "stack" : "heap");
946      exit(0);      goto EXIT;
947        }
948      else if (strcmp(argv[op], "-help") == 0 ||
949               strcmp(argv[op], "--help") == 0)
950        {
951        usage();
952        goto EXIT;
953      }      }
954    else    else
955      {      {
956      printf("** Unknown or malformed option %s\n", argv[op]);      printf("** Unknown or malformed option %s\n", argv[op]);
957      printf("Usage:   pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");      usage();
     printf("  -C     show PCRE compile-time options and exit\n");  
     printf("  -d     debug: show compiled code; implies -i\n");  
 #if !defined NODFA  
     printf("  -dfa   force DFA matching for all subjects\n");  
 #endif  
     printf("  -i     show information about compiled pattern\n"  
            "  -m     output memory used information\n"  
            "  -o <n> set size of offsets vector to <n>\n");  
 #if !defined NOPOSIX  
     printf("  -p     use POSIX interface\n");  
 #endif  
     printf("  -s     output store (memory) used information\n"  
            "  -t     time compilation and execution\n");  
958      yield = 1;      yield = 1;
959      goto EXIT;      goto EXIT;
960      }      }
# Line 598  offsets = (int *)malloc(size_offsets_max Line 969  offsets = (int *)malloc(size_offsets_max
969  if (offsets == NULL)  if (offsets == NULL)
970    {    {
971    printf("** Failed to get %d bytes of memory for offsets vector\n",    printf("** Failed to get %d bytes of memory for offsets vector\n",
972      size_offsets_max * sizeof(int));      (int)(size_offsets_max * sizeof(int)));
973    yield = 1;    yield = 1;
974    goto EXIT;    goto EXIT;
975    }    }
# Line 607  if (offsets == NULL) Line 978  if (offsets == NULL)
978    
979  if (argc > 1)  if (argc > 1)
980    {    {
981    infile = fopen(argv[op], "rb");    infile = fopen(argv[op], INPUT_MODE);
982    if (infile == NULL)    if (infile == NULL)
983      {      {
984      printf("** Failed to open %s\n", argv[op]);      printf("** Failed to open %s\n", argv[op]);
# Line 618  if (argc > 1) Line 989  if (argc > 1)
989    
990  if (argc > 2)  if (argc > 2)
991    {    {
992    outfile = fopen(argv[op+1], "wb");    outfile = fopen(argv[op+1], OUTPUT_MODE);
993    if (outfile == NULL)    if (outfile == NULL)
994      {      {
995      printf("** Failed to open %s\n", argv[op+1]);      printf("** Failed to open %s\n", argv[op+1]);
# Line 634  pcre_free = new_free; Line 1005  pcre_free = new_free;
1005  pcre_stack_malloc = stack_malloc;  pcre_stack_malloc = stack_malloc;
1006  pcre_stack_free = stack_free;  pcre_stack_free = stack_free;
1007    
1008  /* Heading line, then prompt for first regex if stdin */  /* Heading line unless quiet, then prompt for first regex if stdin */
1009    
1010  fprintf(outfile, "PCRE version %s\n\n", pcre_version());  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
1011    
1012  /* Main loop */  /* Main loop */
1013    
# Line 663  while (!done) Line 1034  while (!done)
1034    int do_showinfo = showinfo;    int do_showinfo = showinfo;
1035    int do_showrest = 0;    int do_showrest = 0;
1036    int do_flip = 0;    int do_flip = 0;
1037    int erroroffset, len, delimiter;    int erroroffset, len, delimiter, poffset;
1038    
1039    use_utf8 = 0;    use_utf8 = 0;
1040      debug_lengths = 1;
1041    
1042    if (infile == stdin) printf("  re> ");    if (extend_inputline(infile, buffer, "  re> ") == NULL) break;
   if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL) break;  
1043    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1044    fflush(outfile);    fflush(outfile);
1045    
# Line 680  while (!done) Line 1051  while (!done)
1051    
1052    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1053      {      {
1054      unsigned long int magic;      unsigned long int magic, get_options;
1055      uschar sbuf[8];      uschar sbuf[8];
1056      FILE *f;      FILE *f;
1057    
# Line 728  while (!done) Line 1099  while (!done)
1099    
1100      /* Need to know if UTF-8 for printing data strings */      /* Need to know if UTF-8 for printing data strings */
1101    
1102      new_info(re, NULL, PCRE_INFO_OPTIONS, &options);      new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1103      use_utf8 = (options & PCRE_UTF8) != 0;      use_utf8 = (get_options & PCRE_UTF8) != 0;
1104    
1105      /* Now see if there is any following study data */      /* Now see if there is any following study data */
1106    
# Line 768  while (!done) Line 1139  while (!done)
1139    
1140    if (isalnum(delimiter) || delimiter == '\\')    if (isalnum(delimiter) || delimiter == '\\')
1141      {      {
1142      fprintf(outfile, "** Delimiter must not be alphameric or \\\n");      fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1143      goto SKIP_DATA;      goto SKIP_DATA;
1144      }      }
1145    
1146    pp = p;    pp = p;
1147      poffset = p - buffer;
1148    
1149    for(;;)    for(;;)
1150      {      {
# Line 783  while (!done) Line 1155  while (!done)
1155        pp++;        pp++;
1156        }        }
1157      if (*pp != 0) break;      if (*pp != 0) break;
1158        if ((pp = extend_inputline(infile, pp, "    > ")) == NULL)
     len = BUFFER_SIZE - (pp - buffer);  
     if (len < 256)  
       {  
       fprintf(outfile, "** Expression too long - missing delimiter?\n");  
       goto SKIP_DATA;  
       }  
   
     if (infile == stdin) printf("    > ");  
     if (fgets((char *)pp, len, infile) == NULL)  
1159        {        {
1160        fprintf(outfile, "** Unexpected EOF\n");        fprintf(outfile, "** Unexpected EOF\n");
1161        done = 1;        done = 1;
# Line 801  while (!done) Line 1164  while (!done)
1164      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1165      }      }
1166    
1167      /* The buffer may have moved while being extended; reset the start of data
1168      pointer to the correct relative point in the buffer. */
1169    
1170      p = buffer + poffset;
1171    
1172    /* If the first character after the delimiter is backslash, make    /* If the first character after the delimiter is backslash, make
1173    the pattern end with backslash. This is purely to provide a way    the pattern end with backslash. This is purely to provide a way
1174    of testing for the error message when a pattern ends with backslash. */    of testing for the error message when a pattern ends with backslash. */
# Line 832  while (!done) Line 1200  while (!done)
1200    
1201        case '+': do_showrest = 1; break;        case '+': do_showrest = 1; break;
1202        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
1203          case 'B': do_debug = 1; break;
1204        case 'C': options |= PCRE_AUTO_CALLOUT; break;        case 'C': options |= PCRE_AUTO_CALLOUT; break;
1205        case 'D': do_debug = do_showinfo = 1; break;        case 'D': do_debug = do_showinfo = 1; break;
1206        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1207        case 'F': do_flip = 1; break;        case 'F': do_flip = 1; break;
1208        case 'G': do_G = 1; break;        case 'G': do_G = 1; break;
1209        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
1210          case 'J': options |= PCRE_DUPNAMES; break;
1211        case 'M': log_store = 1; break;        case 'M': log_store = 1; break;
1212        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1213    
# Line 848  while (!done) Line 1218  while (!done)
1218        case 'S': do_study = 1; break;        case 'S': do_study = 1; break;
1219        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
1220        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
1221          case 'Z': debug_lengths = 0; break;
1222        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1223        case '?': options |= PCRE_NO_UTF8_CHECK; break;        case '?': options |= PCRE_NO_UTF8_CHECK; break;
1224    
1225        case 'L':        case 'L':
1226        ppp = pp;        ppp = pp;
1227        /* The '\r' test here is so that it works on Windows */        /* The '\r' test here is so that it works on Windows. */
1228        while (*ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;        /* The '0' test is just in case this is an unterminated line. */
1229          while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1230        *ppp = 0;        *ppp = 0;
1231        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1232          {          {
1233          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1234          goto SKIP_DATA;          goto SKIP_DATA;
1235          }          }
1236          locale_set = 1;
1237        tables = pcre_maketables();        tables = pcre_maketables();
1238        pp = ppp;        pp = ppp;
1239        break;        break;
# Line 872  while (!done) Line 1245  while (!done)
1245        *pp = 0;        *pp = 0;
1246        break;        break;
1247    
1248          case '<':
1249            {
1250            if (strncmp((char *)pp, "JS>", 3) == 0)
1251              {
1252              options |= PCRE_JAVASCRIPT_COMPAT;
1253              pp += 3;
1254              }
1255            else
1256              {
1257              int x = check_newline(pp, outfile);
1258              if (x == 0) goto SKIP_DATA;
1259              options |= x;
1260              while (*pp++ != '>');
1261              }
1262            }
1263          break;
1264    
1265        case '\r':                      /* So that it works in Windows */        case '\r':                      /* So that it works in Windows */
1266        case '\n':        case '\n':
1267        case ' ':        case ' ':
# Line 896  while (!done) Line 1286  while (!done)
1286      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1287      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1288      if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;      if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1289        if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1290        if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1291    
1292      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
1293    
1294      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 903  while (!done) Line 1296  while (!done)
1296    
1297      if (rc != 0)      if (rc != 0)
1298        {        {
1299        (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);        (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1300        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1301        goto SKIP_DATA;        goto SKIP_DATA;
1302        }        }
# Line 915  while (!done) Line 1308  while (!done)
1308  #endif  /* !defined NOPOSIX */  #endif  /* !defined NOPOSIX */
1309    
1310      {      {
1311      if (timeit)      if (timeit > 0)
1312        {        {
1313        register int i;        register int i;
1314        clock_t time_taken;        clock_t time_taken;
1315        clock_t start_time = clock();        clock_t start_time = clock();
1316        for (i = 0; i < LOOPREPEAT; i++)        for (i = 0; i < timeit; i++)
1317          {          {
1318          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1319          if (re != NULL) free(re);          if (re != NULL) free(re);
1320          }          }
1321        time_taken = clock() - start_time;        time_taken = clock() - start_time;
1322        fprintf(outfile, "Compile time %.3f milliseconds\n",        fprintf(outfile, "Compile time %.4f milliseconds\n",
1323          (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /          (((double)time_taken * 1000.0) / (double)timeit) /
1324            (double)CLOCKS_PER_SEC);            (double)CLOCKS_PER_SEC);
1325        }        }
1326    
# Line 944  while (!done) Line 1337  while (!done)
1337          {          {
1338          for (;;)          for (;;)
1339            {            {
1340            if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)            if (extend_inputline(infile, buffer, NULL) == NULL)
1341              {              {
1342              done = 1;              done = 1;
1343              goto CONTINUE;              goto CONTINUE;
# Line 979  while (!done) Line 1372  while (!done)
1372    
1373      if (do_study)      if (do_study)
1374        {        {
1375        if (timeit)        if (timeit > 0)
1376          {          {
1377          register int i;          register int i;
1378          clock_t time_taken;          clock_t time_taken;
1379          clock_t start_time = clock();          clock_t start_time = clock();
1380          for (i = 0; i < LOOPREPEAT; i++)          for (i = 0; i < timeit; i++)
1381            extra = pcre_study(re, study_options, &error);            extra = pcre_study(re, study_options, &error);
1382          time_taken = clock() - start_time;          time_taken = clock() - start_time;
1383          if (extra != NULL) free(extra);          if (extra != NULL) free(extra);
1384          fprintf(outfile, "  Study time %.3f milliseconds\n",          fprintf(outfile, "  Study time %.4f milliseconds\n",
1385            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /            (((double)time_taken * 1000.0) / (double)timeit) /
1386              (double)CLOCKS_PER_SEC);              (double)CLOCKS_PER_SEC);
1387          }          }
1388        extra = pcre_study(re, study_options, &error);        extra = pcre_study(re, study_options, &error);
# Line 1007  while (!done) Line 1400  while (!done)
1400      if (do_flip)      if (do_flip)
1401        {        {
1402        real_pcre *rre = (real_pcre *)re;        real_pcre *rre = (real_pcre *)re;
1403        rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));        rre->magic_number =
1404            byteflip(rre->magic_number, sizeof(rre->magic_number));
1405        rre->size = byteflip(rre->size, sizeof(rre->size));        rre->size = byteflip(rre->size, sizeof(rre->size));
1406        rre->options = byteflip(rre->options, sizeof(rre->options));        rre->options = byteflip(rre->options, sizeof(rre->options));
1407        rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));        rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
1408        rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));        rre->top_bracket =
1409        rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));          (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1410        rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));        rre->top_backref =
1411        rre->name_table_offset = byteflip(rre->name_table_offset,          (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1412          rre->first_byte =
1413            (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
1414          rre->req_byte =
1415            (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
1416          rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1417          sizeof(rre->name_table_offset));          sizeof(rre->name_table_offset));
1418        rre->name_entry_size = byteflip(rre->name_entry_size,        rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
1419          sizeof(rre->name_entry_size));          sizeof(rre->name_entry_size));
1420        rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));        rre->name_count = (pcre_uint16)byteflip(rre->name_count,
1421            sizeof(rre->name_count));
1422    
1423        if (extra != NULL)        if (extra != NULL)
1424          {          {
# Line 1032  while (!done) Line 1432  while (!done)
1432    
1433      SHOW_INFO:      SHOW_INFO:
1434    
1435        if (do_debug)
1436          {
1437          fprintf(outfile, "------------------------------------------------------------------\n");
1438          pcre_printint(re, outfile, debug_lengths);
1439          }
1440    
1441      if (do_showinfo)      if (do_showinfo)
1442        {        {
1443        unsigned long int get_options, all_options;        unsigned long int get_options, all_options;
1444  #if !defined NOINFOCHECK  #if !defined NOINFOCHECK
1445        int old_first_char, old_options, old_count;        int old_first_char, old_options, old_count;
1446  #endif  #endif
1447        int count, backrefmax, first_char, need_char;        int count, backrefmax, first_char, need_char, okpartial, jchanged,
1448            hascrorlf;
1449        int nameentrysize, namecount;        int nameentrysize, namecount;
1450        const uschar *nametable;        const uschar *nametable;
1451    
       if (do_debug)  
         {  
         fprintf(outfile, "------------------------------------------------------------------\n");  
         pcre_printint(re, outfile);  
         }  
   
1452        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1453        new_info(re, NULL, PCRE_INFO_SIZE, &size);        new_info(re, NULL, PCRE_INFO_SIZE, &size);
1454        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
# Line 1057  while (!done) Line 1458  while (!done)
1458        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1459        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1460        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1461          new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1462          new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1463          new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1464    
1465  #if !defined NOINFOCHECK  #if !defined NOINFOCHECK
1466        old_count = pcre_info(re, &old_options, &old_first_char);        old_count = pcre_info(re, &old_options, &old_first_char);
# Line 1098  while (!done) Line 1502  while (!done)
1502            }            }
1503          }          }
1504    
1505        /* The NOPARTIAL bit is a private bit in the options, so we have        if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1506        to fish it out via out back door */        if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
1507    
1508        all_options = ((real_pcre *)re)->options;        all_options = ((real_pcre *)re)->options;
1509        if (do_flip)        if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
         {  
         all_options = byteflip(all_options, sizeof(all_options));  
         }  
   
       if ((all_options & PCRE_NOPARTIAL) != 0)  
         fprintf(outfile, "Partial matching not supported\n");  
1510    
1511        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
1512          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1513            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1514            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1515            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1516            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1517            ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",            ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1518            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1519              ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
1520              ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
1521            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1522            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1523            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1524              ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1525            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1526            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "");            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1527              ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1528    
1529          if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1530    
1531          switch (get_options & PCRE_NEWLINE_BITS)
1532            {
1533            case PCRE_NEWLINE_CR:
1534            fprintf(outfile, "Forced newline sequence: CR\n");
1535            break;
1536    
1537        if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)          case PCRE_NEWLINE_LF:
1538          fprintf(outfile, "Case state changes\n");          fprintf(outfile, "Forced newline sequence: LF\n");
1539            break;
1540    
1541            case PCRE_NEWLINE_CRLF:
1542            fprintf(outfile, "Forced newline sequence: CRLF\n");
1543            break;
1544    
1545            case PCRE_NEWLINE_ANYCRLF:
1546            fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1547            break;
1548    
1549            case PCRE_NEWLINE_ANY:
1550            fprintf(outfile, "Forced newline sequence: ANY\n");
1551            break;
1552    
1553            default:
1554            break;
1555            }
1556    
1557        if (first_char == -1)        if (first_char == -1)
1558          {          {
1559          fprintf(outfile, "First char at start or follows \\n\n");          fprintf(outfile, "First char at start or follows newline\n");
1560          }          }
1561        else if (first_char < 0)        else if (first_char < 0)
1562          {          {
# Line 1140  while (!done) Line 1567  while (!done)
1567          int ch = first_char & 255;          int ch = first_char & 255;
1568          const char *caseless = ((first_char & REQ_CASELESS) == 0)?          const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1569            "" : " (caseless)";            "" : " (caseless)";
1570          if (isprint(ch))          if (PRINTHEX(ch))
1571            fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);            fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1572          else          else
1573            fprintf(outfile, "First char = %d%s\n", ch, caseless);            fprintf(outfile, "First char = %d%s\n", ch, caseless);
# Line 1155  while (!done) Line 1582  while (!done)
1582          int ch = need_char & 255;          int ch = need_char & 255;
1583          const char *caseless = ((need_char & REQ_CASELESS) == 0)?          const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1584            "" : " (caseless)";            "" : " (caseless)";
1585          if (isprint(ch))          if (PRINTHEX(ch))
1586            fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);            fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1587          else          else
1588            fprintf(outfile, "Need char = %d%s\n", ch, caseless);            fprintf(outfile, "Need char = %d%s\n", ch, caseless);
# Line 1191  while (!done) Line 1618  while (!done)
1618                    fprintf(outfile, "\n  ");                    fprintf(outfile, "\n  ");
1619                    c = 2;                    c = 2;
1620                    }                    }
1621                  if (isprint(i) && i != ' ')                  if (PRINTHEX(i) && i != ' ')
1622                    {                    {
1623                    fprintf(outfile, "%c ", i);                    fprintf(outfile, "%c ", i);
1624                    c += 2;                    c += 2;
# Line 1223  while (!done) Line 1650  while (!done)
1650        else        else
1651          {          {
1652          uschar sbuf[8];          uschar sbuf[8];
1653          sbuf[0] = (true_size >> 24)  & 255;          sbuf[0] = (uschar)((true_size >> 24) & 255);
1654          sbuf[1] = (true_size >> 16)  & 255;          sbuf[1] = (uschar)((true_size >> 16) & 255);
1655          sbuf[2] = (true_size >>  8)  & 255;          sbuf[2] = (uschar)((true_size >>  8) & 255);
1656          sbuf[3] = (true_size)  & 255;          sbuf[3] = (uschar)((true_size) & 255);
1657    
1658          sbuf[4] = (true_study_size >> 24)  & 255;          sbuf[4] = (uschar)((true_study_size >> 24) & 255);
1659          sbuf[5] = (true_study_size >> 16)  & 255;          sbuf[5] = (uschar)((true_study_size >> 16) & 255);
1660          sbuf[6] = (true_study_size >>  8)  & 255;          sbuf[6] = (uschar)((true_study_size >>  8) & 255);
1661          sbuf[7] = (true_study_size)  & 255;          sbuf[7] = (uschar)((true_study_size) & 255);
1662    
1663          if (fwrite(sbuf, 1, 8, f) < 8 ||          if (fwrite(sbuf, 1, 8, f) < 8 ||
1664              fwrite(re, 1, true_size, f) < true_size)              fwrite(re, 1, true_size, f) < true_size)
# Line 1250  while (!done) Line 1677  while (!done)
1677                  strerror(errno));                  strerror(errno));
1678                }                }
1679              else fprintf(outfile, "Study data written to %s\n", to_file);              else fprintf(outfile, "Study data written to %s\n", to_file);
1680    
1681              }              }
1682            }            }
1683          fclose(f);          fclose(f);
# Line 1266  while (!done) Line 1694  while (!done)
1694    
1695    for (;;)    for (;;)
1696      {      {
1697      unsigned char *q;      uschar *q;
1698      unsigned char *bptr = dbuffer;      uschar *bptr;
1699      int *use_offsets = offsets;      int *use_offsets = offsets;
1700      int use_size_offsets = size_offsets;      int use_size_offsets = size_offsets;
1701      int callout_data = 0;      int callout_data = 0;
# Line 1284  while (!done) Line 1712  while (!done)
1712    
1713      options = 0;      options = 0;
1714    
1715        *copynames = 0;
1716        *getnames = 0;
1717    
1718        copynamesptr = copynames;
1719        getnamesptr = getnames;
1720    
1721      pcre_callout = callout;      pcre_callout = callout;
1722      first_callout = 1;      first_callout = 1;
1723      callout_extra = 0;      callout_extra = 0;
# Line 1292  while (!done) Line 1726  while (!done)
1726      callout_fail_id = -1;      callout_fail_id = -1;
1727      show_malloc = 0;      show_malloc = 0;
1728    
1729      if (infile == stdin) printf("data> ");      if (extra != NULL) extra->flags &=
1730      if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)        ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1731    
1732        len = 0;
1733        for (;;)
1734        {        {
1735        done = 1;        if (extend_inputline(infile, buffer + len, "data> ") == NULL)
1736        goto CONTINUE;          {
1737            if (len > 0) break;
1738            done = 1;
1739            goto CONTINUE;
1740            }
1741          if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1742          len = (int)strlen((char *)buffer);
1743          if (buffer[len-1] == '\n') break;
1744        }        }
     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);  
1745    
     len = (int)strlen((char *)buffer);  
1746      while (len > 0 && isspace(buffer[len-1])) len--;      while (len > 0 && isspace(buffer[len-1])) len--;
1747      buffer[len] = 0;      buffer[len] = 0;
1748      if (len == 0) break;      if (len == 0) break;
# Line 1308  while (!done) Line 1750  while (!done)
1750      p = buffer;      p = buffer;
1751      while (isspace(*p)) p++;      while (isspace(*p)) p++;
1752    
1753      q = dbuffer;      bptr = q = dbuffer;
1754      while ((c = *p++) != 0)      while ((c = *p++) != 0)
1755        {        {
1756        int i = 0;        int i = 0;
# Line 1330  while (!done) Line 1772  while (!done)
1772          c -= '0';          c -= '0';
1773          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1774            c = c * 8 + *p++ - '0';            c = c * 8 + *p++ - '0';
1775    
1776    #if !defined NOUTF8
1777            if (use_utf8 && c > 255)
1778              {
1779              unsigned char buff8[8];
1780              int ii, utn;
1781              utn = ord2utf8(c, buff8);
1782              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1783              c = buff8[ii];   /* Last byte */
1784              }
1785    #endif
1786          break;          break;
1787    
1788          case 'x':          case 'x':
# Line 1391  while (!done) Line 1844  while (!done)
1844            }            }
1845          else if (isalnum(*p))          else if (isalnum(*p))
1846            {            {
1847            uschar name[256];            uschar *npp = copynamesptr;
           uschar *npp = name;  
1848            while (isalnum(*p)) *npp++ = *p++;            while (isalnum(*p)) *npp++ = *p++;
1849              *npp++ = 0;
1850            *npp = 0;            *npp = 0;
1851            n = pcre_get_stringnumber(re, (char *)name);            n = pcre_get_stringnumber(re, (char *)copynamesptr);
1852            if (n < 0)            if (n < 0)
1853              fprintf(outfile, "no parentheses with name \"%s\"\n", name);              fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1854            else copystrings |= 1 << n;            copynamesptr = npp;
1855            }            }
1856          else if (*p == '+')          else if (*p == '+')
1857            {            {
# Line 1459  while (!done) Line 1912  while (!done)
1912            }            }
1913          else if (isalnum(*p))          else if (isalnum(*p))
1914            {            {
1915            uschar name[256];            uschar *npp = getnamesptr;
           uschar *npp = name;  
1916            while (isalnum(*p)) *npp++ = *p++;            while (isalnum(*p)) *npp++ = *p++;
1917              *npp++ = 0;
1918            *npp = 0;            *npp = 0;
1919            n = pcre_get_stringnumber(re, (char *)name);            n = pcre_get_stringnumber(re, (char *)getnamesptr);
1920            if (n < 0)            if (n < 0)
1921              fprintf(outfile, "no parentheses with name \"%s\"\n", name);              fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1922            else getstrings |= 1 << n;            getnamesptr = npp;
1923            }            }
1924          continue;          continue;
1925    
# Line 1492  while (!done) Line 1945  while (!done)
1945            if (offsets == NULL)            if (offsets == NULL)
1946              {              {
1947              printf("** Failed to get %d bytes of memory for offsets vector\n",              printf("** Failed to get %d bytes of memory for offsets vector\n",
1948                size_offsets_max * sizeof(int));                (int)(size_offsets_max * sizeof(int)));
1949              yield = 1;              yield = 1;
1950              goto EXIT;              goto EXIT;
1951              }              }
# Line 1505  while (!done) Line 1958  while (!done)
1958          options |= PCRE_PARTIAL;          options |= PCRE_PARTIAL;
1959          continue;          continue;
1960    
1961            case 'Q':
1962            while(isdigit(*p)) n = n * 10 + *p++ - '0';
1963            if (extra == NULL)
1964              {
1965              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1966              extra->flags = 0;
1967              }
1968            extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1969            extra->match_limit_recursion = n;
1970            continue;
1971    
1972            case 'q':
1973            while(isdigit(*p)) n = n * 10 + *p++ - '0';
1974            if (extra == NULL)
1975              {
1976              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1977              extra->flags = 0;
1978              }
1979            extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1980            extra->match_limit = n;
1981            continue;
1982    
1983  #if !defined NODFA  #if !defined NODFA
1984          case 'R':          case 'R':
1985          options |= PCRE_DFA_RESTART;          options |= PCRE_DFA_RESTART;
# Line 1522  while (!done) Line 1997  while (!done)
1997          case '?':          case '?':
1998          options |= PCRE_NO_UTF8_CHECK;          options |= PCRE_NO_UTF8_CHECK;
1999          continue;          continue;
2000    
2001            case '<':
2002              {
2003              int x = check_newline(p, outfile);
2004              if (x == 0) goto NEXT_DATA;
2005              options |= x;
2006              while (*p++ != '>');
2007              }
2008            continue;
2009          }          }
2010        *q++ = c;        *q++ = c;
2011        }        }
# Line 1552  while (!done) Line 2036  while (!done)
2036    
2037        if (rc != 0)        if (rc != 0)
2038          {          {
2039          (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);          (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2040          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
2041          }          }
2042          else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
2043                  != 0)
2044            {
2045            fprintf(outfile, "Matched with REG_NOSUB\n");
2046            }
2047        else        else
2048          {          {
2049          size_t i;          size_t i;
# Line 1586  while (!done) Line 2075  while (!done)
2075    
2076      for (;; gmatched++)    /* Loop for /g or /G */      for (;; gmatched++)    /* Loop for /g or /G */
2077        {        {
2078        if (timeit)        if (timeitm > 0)
2079          {          {
2080          register int i;          register int i;
2081          clock_t time_taken;          clock_t time_taken;
# Line 1596  while (!done) Line 2085  while (!done)
2085          if (all_use_dfa || use_dfa)          if (all_use_dfa || use_dfa)
2086            {            {
2087            int workspace[1000];            int workspace[1000];
2088            for (i = 0; i < LOOPREPEAT; i++)            for (i = 0; i < timeitm; i++)
2089              count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,              count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2090                options | g_notempty, use_offsets, use_size_offsets, workspace,                options | g_notempty, use_offsets, use_size_offsets, workspace,
2091                sizeof(workspace)/sizeof(int));                sizeof(workspace)/sizeof(int));
# Line 1604  while (!done) Line 2093  while (!done)
2093          else          else
2094  #endif  #endif
2095    
2096          for (i = 0; i < LOOPREPEAT; i++)          for (i = 0; i < timeitm; i++)
2097            count = pcre_exec(re, extra, (char *)bptr, len,            count = pcre_exec(re, extra, (char *)bptr, len,
2098              start_offset, options | g_notempty, use_offsets, use_size_offsets);              start_offset, options | g_notempty, use_offsets, use_size_offsets);
2099    
2100          time_taken = clock() - start_time;          time_taken = clock() - start_time;
2101          fprintf(outfile, "Execute time %.3f milliseconds\n",          fprintf(outfile, "Execute time %.4f milliseconds\n",
2102            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /            (((double)time_taken * 1000.0) / (double)timeitm) /
2103              (double)CLOCKS_PER_SEC);              (double)CLOCKS_PER_SEC);
2104          }          }
2105    
2106        /* If find_match_limit is set, we want to do repeated matches with        /* If find_match_limit is set, we want to do repeated matches with
2107        varying limits in order to find the minimum value. */        varying limits in order to find the minimum value for the match limit and
2108          for the recursion limit. */
2109    
2110        if (find_match_limit)        if (find_match_limit)
2111          {          {
         int min = 0;  
         int mid = 64;  
         int max = -1;  
   
2112          if (extra == NULL)          if (extra == NULL)
2113            {            {
2114            extra = (pcre_extra *)malloc(sizeof(pcre_extra));            extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2115            extra->flags = 0;            extra->flags = 0;
2116            }            }
         extra->flags |= PCRE_EXTRA_MATCH_LIMIT;  
2117    
2118          for (;;)          (void)check_match_limit(re, extra, bptr, len, start_offset,
2119            {            options|g_notempty, use_offsets, use_size_offsets,
2120            extra->match_limit = mid;            PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2121            count = pcre_exec(re, extra, (char *)bptr, len, start_offset,            PCRE_ERROR_MATCHLIMIT, "match()");
2122              options | g_notempty, use_offsets, use_size_offsets);  
2123            if (count == PCRE_ERROR_MATCHLIMIT)          count = check_match_limit(re, extra, bptr, len, start_offset,
2124              {            options|g_notempty, use_offsets, use_size_offsets,
2125              /* fprintf(outfile, "Testing match limit = %d\n", mid); */            PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2126              min = mid;            PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
             mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;  
             }  
           else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||  
                                  count == PCRE_ERROR_PARTIAL)  
             {  
             if (mid == min + 1)  
               {  
               fprintf(outfile, "Minimum match limit = %d\n", mid);  
               break;  
               }  
             /* fprintf(outfile, "Testing match limit = %d\n", mid); */  
             max = mid;  
             mid = (min + mid)/2;  
             }  
           else break;    /* Some other error */  
           }  
   
         extra->flags &= ~PCRE_EXTRA_MATCH_LIMIT;  
2127          }          }
2128    
2129        /* If callout_data is set, use the interface with additional data */        /* If callout_data is set, use the interface with additional data */
# Line 1708  while (!done) Line 2175  while (!done)
2175    
2176        if (count >= 0)        if (count >= 0)
2177          {          {
2178          int i;          int i, maxcount;
2179    
2180    #if !defined NODFA
2181            if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2182    #endif
2183              maxcount = use_size_offsets/3;
2184    
2185            /* This is a check against a lunatic return value. */
2186    
2187            if (count > maxcount)
2188              {
2189              fprintf(outfile,
2190                "** PCRE error: returned count %d is too big for offset size %d\n",
2191                count, use_size_offsets);
2192              count = use_size_offsets/3;
2193              if (do_g || do_G)
2194                {
2195                fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2196                do_g = do_G = FALSE;        /* Break g/G loop */
2197                }
2198              }
2199    
2200          for (i = 0; i < count * 2; i += 2)          for (i = 0; i < count * 2; i += 2)
2201            {            {
2202            if (use_offsets[i] < 0)            if (use_offsets[i] < 0)
# Line 1736  while (!done) Line 2224  while (!done)
2224            {            {
2225            if ((copystrings & (1 << i)) != 0)            if ((copystrings & (1 << i)) != 0)
2226              {              {
2227              char copybuffer[16];              char copybuffer[256];
2228              int rc = pcre_copy_substring((char *)bptr, use_offsets, count,              int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2229                i, copybuffer, sizeof(copybuffer));                i, copybuffer, sizeof(copybuffer));
2230              if (rc < 0)              if (rc < 0)
# Line 1746  while (!done) Line 2234  while (!done)
2234              }              }
2235            }            }
2236    
2237            for (copynamesptr = copynames;
2238                 *copynamesptr != 0;
2239                 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2240              {
2241              char copybuffer[256];
2242              int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2243                count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2244              if (rc < 0)
2245                fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2246              else
2247                fprintf(outfile, "  C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2248              }
2249    
2250          for (i = 0; i < 32; i++)          for (i = 0; i < 32; i++)
2251            {            {
2252            if ((getstrings & (1 << i)) != 0)            if ((getstrings & (1 << i)) != 0)
# Line 1758  while (!done) Line 2259  while (!done)
2259              else              else
2260                {                {
2261                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
               /* free((void *)substring); */  
2262                pcre_free_substring(substring);                pcre_free_substring(substring);
2263                }                }
2264              }              }
2265            }            }
2266    
2267            for (getnamesptr = getnames;
2268                 *getnamesptr != 0;
2269                 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2270              {
2271              const char *substring;
2272              int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2273                count, (char *)getnamesptr, &substring);
2274              if (rc < 0)
2275                fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2276              else
2277                {
2278                fprintf(outfile, "  G %s (%d) %s\n", substring, rc, getnamesptr);
2279                pcre_free_substring(substring);
2280                }
2281              }
2282    
2283          if (getlist)          if (getlist)
2284            {            {
2285            const char **stringlist;            const char **stringlist;
# Line 1798  while (!done) Line 2314  while (!done)
2314          }          }
2315    
2316        /* Failed to match. If this is a /g or /G loop and we previously set        /* Failed to match. If this is a /g or /G loop and we previously set
2317        g_notempty after a null match, this is not necessarily the end.        g_notempty after a null match, this is not necessarily the end. We want
2318        We want to advance the start offset, and continue. In the case of UTF-8        to advance the start offset, and continue. We won't be at the end of the
2319        matching, the advance must be one character, not one byte. Fudge the        string - that was checked before setting g_notempty.
2320        offset values to achieve this. We won't be at the end of the string -  
2321        that was checked before setting g_notempty. */        Complication arises in the case when the newline option is "any" or
2322          "anycrlf". If the previous match was at the end of a line terminated by
2323          CRLF, an advance of one character just passes the \r, whereas we should
2324          prefer the longer newline sequence, as does the code in pcre_exec().
2325          Fudge the offset value to achieve this.
2326    
2327          Otherwise, in the case of UTF-8 matching, the advance must be one
2328          character, not one byte. */
2329    
2330        else        else
2331          {          {
2332          if (g_notempty != 0)          if (g_notempty != 0)
2333            {            {
2334            int onechar = 1;            int onechar = 1;
2335              unsigned int obits = ((real_pcre *)re)->options;
2336            use_offsets[0] = start_offset;            use_offsets[0] = start_offset;
2337            if (use_utf8)            if ((obits & PCRE_NEWLINE_BITS) == 0)
2338                {
2339                int d;
2340                (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2341                obits = (d == '\r')? PCRE_NEWLINE_CR :
2342                        (d == '\n')? PCRE_NEWLINE_LF :
2343                        (d == ('\r'<<8 | '\n'))? PCRE_NEWLINE_CRLF :
2344                        (d == -2)? PCRE_NEWLINE_ANYCRLF :
2345                        (d == -1)? PCRE_NEWLINE_ANY : 0;
2346                }
2347              if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2348                   (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2349                  &&
2350                  start_offset < len - 1 &&
2351                  bptr[start_offset] == '\r' &&
2352                  bptr[start_offset+1] == '\n')
2353                onechar++;
2354              else if (use_utf8)
2355              {              {
2356              while (start_offset + onechar < len)              while (start_offset + onechar < len)
2357                {                {
# Line 1845  while (!done) Line 2386  while (!done)
2386        character. */        character. */
2387    
2388        g_notempty = 0;        g_notempty = 0;
2389    
2390        if (use_offsets[0] == use_offsets[1])        if (use_offsets[0] == use_offsets[1])
2391          {          {
2392          if (use_offsets[0] == len) break;          if (use_offsets[0] == len) break;
# Line 1863  while (!done) Line 2405  while (!done)
2405          len -= use_offsets[1];          len -= use_offsets[1];
2406          }          }
2407        }  /* End of loop for /g and /G */        }  /* End of loop for /g and /G */
2408    
2409        NEXT_DATA: continue;
2410      }    /* End of loop for data lines */      }    /* End of loop for data lines */
2411    
2412    CONTINUE:    CONTINUE:
# Line 1877  while (!done) Line 2421  while (!done)
2421      {      {
2422      new_free((void *)tables);      new_free((void *)tables);
2423      setlocale(LC_CTYPE, "C");      setlocale(LC_CTYPE, "C");
2424        locale_set = 0;
2425      }      }
2426    }    }
2427    

Legend:
Removed from v.85  
changed lines
  Added in v.336

  ViewVC Help
Powered by ViewVC 1.1.5