/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 77 by nigel, Sat Feb 24 21:40:45 2007 UTC revision 287 by ph10, Tue Dec 18 20:11:28 2007 UTC
# Line 36  POSSIBILITY OF SUCH DAMAGE. Line 36  POSSIBILITY OF SUCH DAMAGE.
36  */  */
37    
38    
39    #ifdef HAVE_CONFIG_H
40    #include "config.h"
41    #endif
42    
43  #include <ctype.h>  #include <ctype.h>
44  #include <stdio.h>  #include <stdio.h>
45  #include <string.h>  #include <string.h>
# Line 44  POSSIBILITY OF SUCH DAMAGE. Line 48  POSSIBILITY OF SUCH DAMAGE.
48  #include <locale.h>  #include <locale.h>
49  #include <errno.h>  #include <errno.h>
50    
51  #define PCRE_SPY        /* For Win32 build, import data, not export */  #ifdef SUPPORT_LIBREADLINE
52    #include <unistd.h>
53    #include <readline/readline.h>
54    #include <readline/history.h>
55    #endif
56    
57    
58    /* A number of things vary for Windows builds. Originally, pcretest opened its
59    input and output without "b"; then I was told that "b" was needed in some
60    environments, so it was added for release 5.0 to both the input and output. (It
61    makes no difference on Unix-like systems.) Later I was told that it is wrong
62    for the input on Windows. I've now abstracted the modes into two macros that
63    are set here, to make it easier to fiddle with them, and removed "b" from the
64    input mode under Windows. */
65    
66    #if defined(_WIN32) || defined(WIN32)
67    #include <io.h>                /* For _setmode() */
68    #include <fcntl.h>             /* For _O_BINARY */
69    #define INPUT_MODE   "r"
70    #define OUTPUT_MODE  "wb"
71    
72    #else
73    #include <sys/time.h>          /* These two includes are needed */
74    #include <sys/resource.h>      /* for setrlimit(). */
75    #define INPUT_MODE   "rb"
76    #define OUTPUT_MODE  "wb"
77    #endif
78    
 /* We need the internal info for displaying the results of pcre_study() and  
 other internal data; pcretest also uses some of the fixed tables, and generally  
 has "inside information" compared to a program that strictly follows the PCRE  
 API. */  
79    
80    /* We have to include pcre_internal.h because we need the internal info for
81    displaying the results of pcre_study() and we also need to know about the
82    internal macros, structures, and other internal data values; pcretest has
83    "inside information" compared to a program that strictly follows the PCRE API.
84    
85    Although pcre_internal.h does itself include pcre.h, we explicitly include it
86    here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
87    appropriately for an application, not for building PCRE. */
88    
89    #include "pcre.h"
90  #include "pcre_internal.h"  #include "pcre_internal.h"
91    
92    /* We need access to the data tables that PCRE uses. So as not to have to keep
93    two copies, we include the source file here, changing the names of the external
94    symbols to prevent clashes. */
95    
96    #define _pcre_utf8_table1      utf8_table1
97    #define _pcre_utf8_table1_size utf8_table1_size
98    #define _pcre_utf8_table2      utf8_table2
99    #define _pcre_utf8_table3      utf8_table3
100    #define _pcre_utf8_table4      utf8_table4
101    #define _pcre_utt              utt
102    #define _pcre_utt_size         utt_size
103    #define _pcre_utt_names        utt_names
104    #define _pcre_OP_lengths       OP_lengths
105    
106    #include "pcre_tables.c"
107    
108    /* We also need the pcre_printint() function for printing out compiled
109    patterns. This function is in a separate file so that it can be included in
110    pcre_compile.c when that module is compiled with debugging enabled.
111    
112    The definition of the macro PRINTABLE, which determines whether to print an
113    output character as-is or as a hex value when showing compiled patterns, is
114    contained in this file. We uses it here also, in cases when the locale has not
115    been explicitly changed, so as to get consistent output from systems that
116    differ in their output from isprint() even in the "C" locale. */
117    
118    #include "pcre_printint.src"
119    
120    #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
121    
122    
123  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
124  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 62  Makefile. */ Line 128  Makefile. */
128  #include "pcreposix.h"  #include "pcreposix.h"
129  #endif  #endif
130    
131    /* It is also possible, for the benefit of the version currently imported into
132    Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
133    interface to the DFA matcher (NODFA), and without the doublecheck of the old
134    "info" function (define NOINFOCHECK). In fact, we automatically cut out the
135    UTF8 support if PCRE is built without it. */
136    
137    #ifndef SUPPORT_UTF8
138    #ifndef NOUTF8
139    #define NOUTF8
140    #endif
141    #endif
142    
143    
144    /* Other parameters */
145    
146  #ifndef CLOCKS_PER_SEC  #ifndef CLOCKS_PER_SEC
147  #ifdef CLK_TCK  #ifdef CLK_TCK
148  #define CLOCKS_PER_SEC CLK_TCK  #define CLOCKS_PER_SEC CLK_TCK
# Line 70  Makefile. */ Line 151  Makefile. */
151  #endif  #endif
152  #endif  #endif
153    
154  #define LOOPREPEAT 500000  /* This is the default loop count for timing. */
155    
156  #define BUFFER_SIZE 30000  #define LOOPREPEAT 500000
 #define PBUFFER_SIZE BUFFER_SIZE  
 #define DBUFFER_SIZE BUFFER_SIZE  
157    
158    /* Static variables */
159    
160  static FILE *outfile;  static FILE *outfile;
161  static int log_store = 0;  static int log_store = 0;
# Line 83  static int callout_count; Line 163  static int callout_count;
163  static int callout_extra;  static int callout_extra;
164  static int callout_fail_count;  static int callout_fail_count;
165  static int callout_fail_id;  static int callout_fail_id;
166    static int debug_lengths;
167  static int first_callout;  static int first_callout;
168    static int locale_set = 0;
169  static int show_malloc;  static int show_malloc;
170  static int use_utf8;  static int use_utf8;
171  static size_t gotten_store;  static size_t gotten_store;
172    
173    /* The buffers grow automatically if very long input lines are encountered. */
174    
175    static int buffer_size = 50000;
176    static uschar *buffer = NULL;
177    static uschar *dbuffer = NULL;
178  static uschar *pbuffer = NULL;  static uschar *pbuffer = NULL;
179    
180    
181    
182  /*************************************************  /*************************************************
183    *        Read or extend an input line            *
184    *************************************************/
185    
186    /* Input lines are read into buffer, but both patterns and data lines can be
187    continued over multiple input lines. In addition, if the buffer fills up, we
188    want to automatically expand it so as to be able to handle extremely large
189    lines that are needed for certain stress tests. When the input buffer is
190    expanded, the other two buffers must also be expanded likewise, and the
191    contents of pbuffer, which are a copy of the input for callouts, must be
192    preserved (for when expansion happens for a data line). This is not the most
193    optimal way of handling this, but hey, this is just a test program!
194    
195    Arguments:
196      f            the file to read
197      start        where in buffer to start (this *must* be within buffer)
198      prompt       for stdin or readline()
199    
200    Returns:       pointer to the start of new data
201                   could be a copy of start, or could be moved
202                   NULL if no data read and EOF reached
203    */
204    
205    static uschar *
206    extend_inputline(FILE *f, uschar *start, const char *prompt)
207    {
208    uschar *here = start;
209    
210    for (;;)
211      {
212      int rlen = buffer_size - (here - buffer);
213    
214      if (rlen > 1000)
215        {
216        int dlen;
217    
218        /* If libreadline support is required, use readline() to read a line if the
219        input is a terminal. Note that readline() removes the trailing newline, so
220        we must put it back again, to be compatible with fgets(). */
221    
222    #ifdef SUPPORT_LIBREADLINE
223        if (isatty(fileno(f)))
224          {
225          size_t len;
226          char *s = readline(prompt);
227          if (s == NULL) return (here == start)? NULL : start;
228          len = strlen(s);
229          if (len > 0) add_history(s);
230          if (len > rlen - 1) len = rlen - 1;
231          memcpy(here, s, len);
232          here[len] = '\n';
233          here[len+1] = 0;
234          free(s);
235          }
236        else
237    #endif
238    
239        /* Read the next line by normal means, prompting if the file is stdin. */
240    
241          {
242          if (f == stdin) printf(prompt);
243          if (fgets((char *)here, rlen,  f) == NULL)
244            return (here == start)? NULL : start;
245          }
246    
247        dlen = (int)strlen((char *)here);
248        if (dlen > 0 && here[dlen - 1] == '\n') return start;
249        here += dlen;
250        }
251    
252      else
253        {
254        int new_buffer_size = 2*buffer_size;
255        uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
256        uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
257        uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
258    
259        if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
260          {
261          fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
262          exit(1);
263          }
264    
265        memcpy(new_buffer, buffer, buffer_size);
266        memcpy(new_pbuffer, pbuffer, buffer_size);
267    
268        buffer_size = new_buffer_size;
269    
270        start = new_buffer + (start - buffer);
271        here = new_buffer + (here - buffer);
272    
273        free(buffer);
274        free(dbuffer);
275        free(pbuffer);
276    
277        buffer = new_buffer;
278        dbuffer = new_dbuffer;
279        pbuffer = new_pbuffer;
280        }
281      }
282    
283    return NULL;  /* Control never gets here */
284    }
285    
286    
287    
288    
289    
290    
291    
292    /*************************************************
293  *          Read number from string               *  *          Read number from string               *
294  *************************************************/  *************************************************/
295    
296  /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess  /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
297  around with conditional compilation, just do the job by hand. It is only used  around with conditional compilation, just do the job by hand. It is only used
298  for unpicking the -o argument, so just keep it simple.  for unpicking arguments, so just keep it simple.
299    
300  Arguments:  Arguments:
301    str           string to be converted    str           string to be converted
# Line 128  return(result); Line 325  return(result);
325  and returns the value of the character.  and returns the value of the character.
326    
327  Argument:  Argument:
328    buffer   a pointer to the byte vector    utf8bytes   a pointer to the byte vector
329    vptr     a pointer to an int to receive the value    vptr        a pointer to an int to receive the value
330    
331  Returns:   >  0 => the number of bytes consumed  Returns:      >  0 => the number of bytes consumed
332             -6 to 0 => malformed UTF-8 character at offset = (-return)                -6 to 0 => malformed UTF-8 character at offset = (-return)
333  */  */
334    
335    #if !defined NOUTF8
336    
337  static int  static int
338  utf82ord(unsigned char *buffer, int *vptr)  utf82ord(unsigned char *utf8bytes, int *vptr)
339  {  {
340  int c = *buffer++;  int c = *utf8bytes++;
341  int d = c;  int d = c;
342  int i, j, s;  int i, j, s;
343    
# Line 154  if (i == 0 || i == 6) return 0;        / Line 353  if (i == 0 || i == 6) return 0;        /
353  /* i now has a value in the range 1-5 */  /* i now has a value in the range 1-5 */
354    
355  s = 6*i;  s = 6*i;
356  d = (c & _pcre_utf8_table3[i]) << s;  d = (c & utf8_table3[i]) << s;
357    
358  for (j = 0; j < i; j++)  for (j = 0; j < i; j++)
359    {    {
360    c = *buffer++;    c = *utf8bytes++;
361    if ((c & 0xc0) != 0x80) return -(j+1);    if ((c & 0xc0) != 0x80) return -(j+1);
362    s -= 6;    s -= 6;
363    d |= (c & 0x3f) << s;    d |= (c & 0x3f) << s;
# Line 166  for (j = 0; j < i; j++) Line 365  for (j = 0; j < i; j++)
365    
366  /* Check that encoding was the correct unique one */  /* Check that encoding was the correct unique one */
367    
368  for (j = 0; j < _pcre_utf8_table1_size; j++)  for (j = 0; j < utf8_table1_size; j++)
369    if (d <= _pcre_utf8_table1[j]) break;    if (d <= utf8_table1[j]) break;
370  if (j != i) return -(i+1);  if (j != i) return -(i+1);
371    
372  /* Valid value */  /* Valid value */
# Line 176  if (j != i) return -(i+1); Line 375  if (j != i) return -(i+1);
375  return i+1;  return i+1;
376  }  }
377    
378    #endif
379    
380    
381    
382    /*************************************************
383    *       Convert character value to UTF-8         *
384    *************************************************/
385    
386    /* This function takes an integer value in the range 0 - 0x7fffffff
387    and encodes it as a UTF-8 character in 0 to 6 bytes.
388    
389    Arguments:
390      cvalue     the character value
391      utf8bytes  pointer to buffer for result - at least 6 bytes long
392    
393    Returns:     number of characters placed in the buffer
394    */
395    
396    #if !defined NOUTF8
397    
398    static int
399    ord2utf8(int cvalue, uschar *utf8bytes)
400    {
401    register int i, j;
402    for (i = 0; i < utf8_table1_size; i++)
403      if (cvalue <= utf8_table1[i]) break;
404    utf8bytes += i;
405    for (j = i; j > 0; j--)
406     {
407     *utf8bytes-- = 0x80 | (cvalue & 0x3f);
408     cvalue >>= 6;
409     }
410    *utf8bytes = utf8_table2[i] | cvalue;
411    return i + 1;
412    }
413    
414    #endif
415    
416    
417    
418  /*************************************************  /*************************************************
# Line 188  chars without printing. */ Line 425  chars without printing. */
425    
426  static int pchars(unsigned char *p, int length, FILE *f)  static int pchars(unsigned char *p, int length, FILE *f)
427  {  {
428  int c;  int c = 0;
429  int yield = 0;  int yield = 0;
430    
431  while (length-- > 0)  while (length-- > 0)
432    {    {
433    #if !defined NOUTF8
434    if (use_utf8)    if (use_utf8)
435      {      {
436      int rc = utf82ord(p, &c);      int rc = utf82ord(p, &c);
# Line 201  while (length-- > 0) Line 439  while (length-- > 0)
439        {        {
440        length -= rc - 1;        length -= rc - 1;
441        p += rc;        p += rc;
442        if (c < 256 && isprint(c))        if (PRINTHEX(c))
443          {          {
444          if (f != NULL) fprintf(f, "%c", c);          if (f != NULL) fprintf(f, "%c", c);
445          yield++;          yield++;
446          }          }
447        else        else
448          {          {
449          int n;          int n = 4;
450          if (f != NULL) fprintf(f, "\\x{%02x}%n", c, &n);          if (f != NULL) fprintf(f, "\\x{%02x}", c);
451          yield += n;          yield += (n <= 0x000000ff)? 2 :
452                     (n <= 0x00000fff)? 3 :
453                     (n <= 0x0000ffff)? 4 :
454                     (n <= 0x000fffff)? 5 : 6;
455          }          }
456        continue;        continue;
457        }        }
458      }      }
459    #endif
460    
461     /* Not UTF-8, or malformed UTF-8  */     /* Not UTF-8, or malformed UTF-8  */
462    
463    if (isprint(c = *(p++)))    c = *p++;
464      if (PRINTHEX(c))
465      {      {
466      if (f != NULL) fprintf(f, "%c", c);      if (f != NULL) fprintf(f, "%c", c);
467      yield++;      yield++;
# Line 392  if ((rc = pcre_fullinfo(re, study, optio Line 635  if ((rc = pcre_fullinfo(re, study, optio
635  *         Byte flipping function                 *  *         Byte flipping function                 *
636  *************************************************/  *************************************************/
637    
638  static long int  static unsigned long int
639  byteflip(long int value, int n)  byteflip(unsigned long int value, int n)
640  {  {
641  if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);  if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
642  return ((value & 0x000000ff) << 24) |  return ((value & 0x000000ff) << 24) |
# Line 406  return ((value & 0x000000ff) << 24) | Line 649  return ((value & 0x000000ff) << 24) |
649    
650    
651  /*************************************************  /*************************************************
652    *        Check match or recursion limit          *
653    *************************************************/
654    
655    static int
656    check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
657      int start_offset, int options, int *use_offsets, int use_size_offsets,
658      int flag, unsigned long int *limit, int errnumber, const char *msg)
659    {
660    int count;
661    int min = 0;
662    int mid = 64;
663    int max = -1;
664    
665    extra->flags |= flag;
666    
667    for (;;)
668      {
669      *limit = mid;
670    
671      count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
672        use_offsets, use_size_offsets);
673    
674      if (count == errnumber)
675        {
676        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
677        min = mid;
678        mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
679        }
680    
681      else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
682                             count == PCRE_ERROR_PARTIAL)
683        {
684        if (mid == min + 1)
685          {
686          fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
687          break;
688          }
689        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
690        max = mid;
691        mid = (min + mid)/2;
692        }
693      else break;    /* Some other error */
694      }
695    
696    extra->flags &= ~flag;
697    return count;
698    }
699    
700    
701    
702    /*************************************************
703    *         Case-independent strncmp() function    *
704    *************************************************/
705    
706    /*
707    Arguments:
708      s         first string
709      t         second string
710      n         number of characters to compare
711    
712    Returns:    < 0, = 0, or > 0, according to the comparison
713    */
714    
715    static int
716    strncmpic(uschar *s, uschar *t, int n)
717    {
718    while (n--)
719      {
720      int c = tolower(*s++) - tolower(*t++);
721      if (c) return c;
722      }
723    return 0;
724    }
725    
726    
727    
728    /*************************************************
729    *         Check newline indicator                *
730    *************************************************/
731    
732    /* This is used both at compile and run-time to check for <xxx> escapes, where
733    xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is
734    no match.
735    
736    Arguments:
737      p           points after the leading '<'
738      f           file for error message
739    
740    Returns:      appropriate PCRE_NEWLINE_xxx flags, or 0
741    */
742    
743    static int
744    check_newline(uschar *p, FILE *f)
745    {
746    if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
747    if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
748    if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
749    if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
750    if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
751    if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
752    if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
753    fprintf(f, "Unknown newline type at: <%s\n", p);
754    return 0;
755    }
756    
757    
758    
759    /*************************************************
760    *             Usage function                     *
761    *************************************************/
762    
763    static void
764    usage(void)
765    {
766    printf("Usage:     pcretest [options] [<input file> [<output file>]]\n\n");
767    printf("Input and output default to stdin and stdout.\n");
768    #ifdef SUPPORT_LIBREADLINE
769    printf("If input is a terminal, readline() is used to read from it.\n");
770    #else
771    printf("This version of pcretest is not linked with readline().\n");
772    #endif
773    printf("\nOptions:\n");
774    printf("  -b       show compiled code (bytecode)\n");
775    printf("  -C       show PCRE compile-time options and exit\n");
776    printf("  -d       debug: show compiled code and information (-b and -i)\n");
777    #if !defined NODFA
778    printf("  -dfa     force DFA matching for all subjects\n");
779    #endif
780    printf("  -help    show usage information\n");
781    printf("  -i       show information about compiled patterns\n"
782           "  -m       output memory used information\n"
783           "  -o <n>   set size of offsets vector to <n>\n");
784    #if !defined NOPOSIX
785    printf("  -p       use POSIX interface\n");
786    #endif
787    printf("  -q       quiet: do not output PCRE version number at start\n");
788    printf("  -S <n>   set stack size to <n> megabytes\n");
789    printf("  -s       output store (memory) used information\n"
790           "  -t       time compilation and execution\n");
791    printf("  -t <n>   time compilation and execution, repeating <n> times\n");
792    printf("  -tm      time execution (matching) only\n");
793    printf("  -tm <n>  time execution (matching) only, repeating <n> times\n");
794    }
795    
796    
797    
798    /*************************************************
799  *                Main Program                    *  *                Main Program                    *
800  *************************************************/  *************************************************/
801    
# Line 420  int options = 0; Line 810  int options = 0;
810  int study_options = 0;  int study_options = 0;
811  int op = 1;  int op = 1;
812  int timeit = 0;  int timeit = 0;
813    int timeitm = 0;
814  int showinfo = 0;  int showinfo = 0;
815  int showstore = 0;  int showstore = 0;
816    int quiet = 0;
817  int size_offsets = 45;  int size_offsets = 45;
818  int size_offsets_max;  int size_offsets_max;
819  int *offsets = NULL;  int *offsets = NULL;
# Line 432  int debug = 0; Line 824  int debug = 0;
824  int done = 0;  int done = 0;
825  int all_use_dfa = 0;  int all_use_dfa = 0;
826  int yield = 0;  int yield = 0;
827    int stack_size;
828    
829    /* These vectors store, end-to-end, a list of captured substring names. Assume
830    that 1024 is plenty long enough for the few names we'll be testing. */
831    
832  unsigned char *buffer;  uschar copynames[1024];
833  unsigned char *dbuffer;  uschar getnames[1024];
834    
835    uschar *copynamesptr;
836    uschar *getnamesptr;
837    
838  /* Get buffers from malloc() so that Electric Fence will check their misuse  /* Get buffers from malloc() so that Electric Fence will check their misuse
839  when I am debugging. */  when I am debugging. They grow automatically when very long lines are read. */
840    
841  buffer = (unsigned char *)malloc(BUFFER_SIZE);  buffer = (unsigned char *)malloc(buffer_size);
842  dbuffer = (unsigned char *)malloc(DBUFFER_SIZE);  dbuffer = (unsigned char *)malloc(buffer_size);
843  pbuffer = (unsigned char *)malloc(PBUFFER_SIZE);  pbuffer = (unsigned char *)malloc(buffer_size);
   
 /* The outfile variable is static so that new_malloc can use it. The _setmode()  
 stuff is some magic that I don't understand, but which apparently does good  
 things in Windows. It's related to line terminations.  */  
844    
845  #if defined(_WIN32) || defined(WIN32)  /* The outfile variable is static so that new_malloc can use it. */
 _setmode( _fileno( stdout ), 0x8000 );  
 #endif  /* defined(_WIN32) || defined(WIN32) */  
846    
847  outfile = stdout;  outfile = stdout;
848    
849    /* The following  _setmode() stuff is some Windows magic that tells its runtime
850    library to translate CRLF into a single LF character. At least, that's what
851    I've been told: never having used Windows I take this all on trust. Originally
852    it set 0x8000, but then I was advised that _O_BINARY was better. */
853    
854    #if defined(_WIN32) || defined(WIN32)
855    _setmode( _fileno( stdout ), _O_BINARY );
856    #endif
857    
858  /* Scan options */  /* Scan options */
859    
860  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
# Line 461  while (argc > 1 && argv[op][0] == '-') Line 863  while (argc > 1 && argv[op][0] == '-')
863    
864    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
865      showstore = 1;      showstore = 1;
866    else if (strcmp(argv[op], "-t") == 0) timeit = 1;    else if (strcmp(argv[op], "-q") == 0) quiet = 1;
867      else if (strcmp(argv[op], "-b") == 0) debug = 1;
868    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
869    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
870    #if !defined NODFA
871    else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;    else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
872    #endif
873    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
874        ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),        ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
875          *endptr == 0))          *endptr == 0))
# Line 472  while (argc > 1 && argv[op][0] == '-') Line 877  while (argc > 1 && argv[op][0] == '-')
877      op++;      op++;
878      argc--;      argc--;
879      }      }
880      else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
881        {
882        int both = argv[op][2] == 0;
883        int temp;
884        if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
885                         *endptr == 0))
886          {
887          timeitm = temp;
888          op++;
889          argc--;
890          }
891        else timeitm = LOOPREPEAT;
892        if (both) timeit = timeitm;
893        }
894      else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
895          ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
896            *endptr == 0))
897        {
898    #if defined(_WIN32) || defined(WIN32)
899        printf("PCRE: -S not supported on this OS\n");
900        exit(1);
901    #else
902        int rc;
903        struct rlimit rlim;
904        getrlimit(RLIMIT_STACK, &rlim);
905        rlim.rlim_cur = stack_size * 1024 * 1024;
906        rc = setrlimit(RLIMIT_STACK, &rlim);
907        if (rc != 0)
908          {
909        printf("PCRE: setrlimit() failed with error %d\n", rc);
910        exit(1);
911          }
912        op++;
913        argc--;
914    #endif
915        }
916  #if !defined NOPOSIX  #if !defined NOPOSIX
917    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(argv[op], "-p") == 0) posix = 1;
918  #endif  #endif
# Line 485  while (argc > 1 && argv[op][0] == '-') Line 926  while (argc > 1 && argv[op][0] == '-')
926      (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);      (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
927      printf("  %sUnicode properties support\n", rc? "" : "No ");      printf("  %sUnicode properties support\n", rc? "" : "No ");
928      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
929      printf("  Newline character is %s\n", (rc == '\r')? "CR" : "LF");      printf("  Newline sequence is %s\n", (rc == '\r')? "CR" :
930          (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :
931          (rc == -2)? "ANYCRLF" :
932          (rc == -1)? "ANY" : "???");
933        (void)pcre_config(PCRE_CONFIG_BSR, &rc);
934        printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :
935                                         "all Unicode newlines");
936      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
937      printf("  Internal link size = %d\n", rc);      printf("  Internal link size = %d\n", rc);
938      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
939      printf("  POSIX malloc threshold = %d\n", rc);      printf("  POSIX malloc threshold = %d\n", rc);
940      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
941      printf("  Default match limit = %d\n", rc);      printf("  Default match limit = %d\n", rc);
942        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);
943        printf("  Default recursion depth limit = %d\n", rc);
944      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
945      printf("  Match recursion uses %s\n", rc? "stack" : "heap");      printf("  Match recursion uses %s\n", rc? "stack" : "heap");
946      exit(0);      goto EXIT;
947        }
948      else if (strcmp(argv[op], "-help") == 0 ||
949               strcmp(argv[op], "--help") == 0)
950        {
951        usage();
952        goto EXIT;
953      }      }
954    else    else
955      {      {
956      printf("** Unknown or malformed option %s\n", argv[op]);      printf("** Unknown or malformed option %s\n", argv[op]);
957      printf("Usage:   pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");      usage();
     printf("  -C     show PCRE compile-time options and exit\n");  
     printf("  -d     debug: show compiled code; implies -i\n");  
     printf("  -dfa   force DFA matching for all subjects\n");  
     printf("  -i     show information about compiled pattern\n"  
            "  -m     output memory used information\n"  
            "  -o <n> set size of offsets vector to <n>\n");  
 #if !defined NOPOSIX  
     printf("  -p     use POSIX interface\n");  
 #endif  
     printf("  -s     output store (memory) used information\n"  
            "  -t     time compilation and execution\n");  
958      yield = 1;      yield = 1;
959      goto EXIT;      goto EXIT;
960      }      }
# Line 525  offsets = (int *)malloc(size_offsets_max Line 969  offsets = (int *)malloc(size_offsets_max
969  if (offsets == NULL)  if (offsets == NULL)
970    {    {
971    printf("** Failed to get %d bytes of memory for offsets vector\n",    printf("** Failed to get %d bytes of memory for offsets vector\n",
972      size_offsets_max * sizeof(int));      (int)(size_offsets_max * sizeof(int)));
973    yield = 1;    yield = 1;
974    goto EXIT;    goto EXIT;
975    }    }
# Line 534  if (offsets == NULL) Line 978  if (offsets == NULL)
978    
979  if (argc > 1)  if (argc > 1)
980    {    {
981    infile = fopen(argv[op], "rb");    infile = fopen(argv[op], INPUT_MODE);
982    if (infile == NULL)    if (infile == NULL)
983      {      {
984      printf("** Failed to open %s\n", argv[op]);      printf("** Failed to open %s\n", argv[op]);
# Line 545  if (argc > 1) Line 989  if (argc > 1)
989    
990  if (argc > 2)  if (argc > 2)
991    {    {
992    outfile = fopen(argv[op+1], "wb");    outfile = fopen(argv[op+1], OUTPUT_MODE);
993    if (outfile == NULL)    if (outfile == NULL)
994      {      {
995      printf("** Failed to open %s\n", argv[op+1]);      printf("** Failed to open %s\n", argv[op+1]);
# Line 561  pcre_free = new_free; Line 1005  pcre_free = new_free;
1005  pcre_stack_malloc = stack_malloc;  pcre_stack_malloc = stack_malloc;
1006  pcre_stack_free = stack_free;  pcre_stack_free = stack_free;
1007    
1008  /* Heading line, then prompt for first regex if stdin */  /* Heading line unless quiet, then prompt for first regex if stdin */
1009    
1010  fprintf(outfile, "PCRE version %s\n\n", pcre_version());  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
1011    
1012  /* Main loop */  /* Main loop */
1013    
# Line 590  while (!done) Line 1034  while (!done)
1034    int do_showinfo = showinfo;    int do_showinfo = showinfo;
1035    int do_showrest = 0;    int do_showrest = 0;
1036    int do_flip = 0;    int do_flip = 0;
1037    int erroroffset, len, delimiter;    int erroroffset, len, delimiter, poffset;
1038    
1039    use_utf8 = 0;    use_utf8 = 0;
1040      debug_lengths = 1;
1041    
1042    if (infile == stdin) printf("  re> ");    if (extend_inputline(infile, buffer, "  re> ") == NULL) break;
   if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL) break;  
1043    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1044    fflush(outfile);    fflush(outfile);
1045    
# Line 607  while (!done) Line 1051  while (!done)
1051    
1052    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1053      {      {
1054      unsigned long int magic;      unsigned long int magic, get_options;
1055      uschar sbuf[8];      uschar sbuf[8];
1056      FILE *f;      FILE *f;
1057    
# Line 655  while (!done) Line 1099  while (!done)
1099    
1100      /* Need to know if UTF-8 for printing data strings */      /* Need to know if UTF-8 for printing data strings */
1101    
1102      new_info(re, NULL, PCRE_INFO_OPTIONS, &options);      new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1103      use_utf8 = (options & PCRE_UTF8) != 0;      use_utf8 = (get_options & PCRE_UTF8) != 0;
1104    
1105      /* Now see if there is any following study data */      /* Now see if there is any following study data */
1106    
# Line 695  while (!done) Line 1139  while (!done)
1139    
1140    if (isalnum(delimiter) || delimiter == '\\')    if (isalnum(delimiter) || delimiter == '\\')
1141      {      {
1142      fprintf(outfile, "** Delimiter must not be alphameric or \\\n");      fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1143      goto SKIP_DATA;      goto SKIP_DATA;
1144      }      }
1145    
1146    pp = p;    pp = p;
1147      poffset = p - buffer;
1148    
1149    for(;;)    for(;;)
1150      {      {
# Line 710  while (!done) Line 1155  while (!done)
1155        pp++;        pp++;
1156        }        }
1157      if (*pp != 0) break;      if (*pp != 0) break;
1158        if ((pp = extend_inputline(infile, pp, "    > ")) == NULL)
     len = BUFFER_SIZE - (pp - buffer);  
     if (len < 256)  
       {  
       fprintf(outfile, "** Expression too long - missing delimiter?\n");  
       goto SKIP_DATA;  
       }  
   
     if (infile == stdin) printf("    > ");  
     if (fgets((char *)pp, len, infile) == NULL)  
1159        {        {
1160        fprintf(outfile, "** Unexpected EOF\n");        fprintf(outfile, "** Unexpected EOF\n");
1161        done = 1;        done = 1;
# Line 728  while (!done) Line 1164  while (!done)
1164      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1165      }      }
1166    
1167      /* The buffer may have moved while being extended; reset the start of data
1168      pointer to the correct relative point in the buffer. */
1169    
1170      p = buffer + poffset;
1171    
1172    /* If the first character after the delimiter is backslash, make    /* If the first character after the delimiter is backslash, make
1173    the pattern end with backslash. This is purely to provide a way    the pattern end with backslash. This is purely to provide a way
1174    of testing for the error message when a pattern ends with backslash. */    of testing for the error message when a pattern ends with backslash. */
# Line 759  while (!done) Line 1200  while (!done)
1200    
1201        case '+': do_showrest = 1; break;        case '+': do_showrest = 1; break;
1202        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
1203          case 'B': do_debug = 1; break;
1204        case 'C': options |= PCRE_AUTO_CALLOUT; break;        case 'C': options |= PCRE_AUTO_CALLOUT; break;
1205        case 'D': do_debug = do_showinfo = 1; break;        case 'D': do_debug = do_showinfo = 1; break;
1206        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1207        case 'F': do_flip = 1; break;        case 'F': do_flip = 1; break;
1208        case 'G': do_G = 1; break;        case 'G': do_G = 1; break;
1209        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
1210          case 'J': options |= PCRE_DUPNAMES; break;
1211        case 'M': log_store = 1; break;        case 'M': log_store = 1; break;
1212        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1213    
# Line 775  while (!done) Line 1218  while (!done)
1218        case 'S': do_study = 1; break;        case 'S': do_study = 1; break;
1219        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
1220        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
1221          case 'Z': debug_lengths = 0; break;
1222        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1223        case '?': options |= PCRE_NO_UTF8_CHECK; break;        case '?': options |= PCRE_NO_UTF8_CHECK; break;
1224    
1225        case 'L':        case 'L':
1226        ppp = pp;        ppp = pp;
1227        /* The '\r' test here is so that it works on Windows */        /* The '\r' test here is so that it works on Windows. */
1228        while (*ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;        /* The '0' test is just in case this is an unterminated line. */
1229          while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1230        *ppp = 0;        *ppp = 0;
1231        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1232          {          {
1233          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1234          goto SKIP_DATA;          goto SKIP_DATA;
1235          }          }
1236          locale_set = 1;
1237        tables = pcre_maketables();        tables = pcre_maketables();
1238        pp = ppp;        pp = ppp;
1239        break;        break;
# Line 799  while (!done) Line 1245  while (!done)
1245        *pp = 0;        *pp = 0;
1246        break;        break;
1247    
1248          case '<':
1249            {
1250            int x = check_newline(pp, outfile);
1251            if (x == 0) goto SKIP_DATA;
1252            options |= x;
1253            while (*pp++ != '>');
1254            }
1255          break;
1256    
1257        case '\r':                      /* So that it works in Windows */        case '\r':                      /* So that it works in Windows */
1258        case '\n':        case '\n':
1259        case ' ':        case ' ':
# Line 823  while (!done) Line 1278  while (!done)
1278      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1279      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1280      if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;      if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1281        if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1282        if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1283    
1284      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
1285    
1286      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 830  while (!done) Line 1288  while (!done)
1288    
1289      if (rc != 0)      if (rc != 0)
1290        {        {
1291        (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);        (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1292        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1293        goto SKIP_DATA;        goto SKIP_DATA;
1294        }        }
# Line 842  while (!done) Line 1300  while (!done)
1300  #endif  /* !defined NOPOSIX */  #endif  /* !defined NOPOSIX */
1301    
1302      {      {
1303      if (timeit)      if (timeit > 0)
1304        {        {
1305        register int i;        register int i;
1306        clock_t time_taken;        clock_t time_taken;
1307        clock_t start_time = clock();        clock_t start_time = clock();
1308        for (i = 0; i < LOOPREPEAT; i++)        for (i = 0; i < timeit; i++)
1309          {          {
1310          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1311          if (re != NULL) free(re);          if (re != NULL) free(re);
1312          }          }
1313        time_taken = clock() - start_time;        time_taken = clock() - start_time;
1314        fprintf(outfile, "Compile time %.3f milliseconds\n",        fprintf(outfile, "Compile time %.4f milliseconds\n",
1315          (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /          (((double)time_taken * 1000.0) / (double)timeit) /
1316            (double)CLOCKS_PER_SEC);            (double)CLOCKS_PER_SEC);
1317        }        }
1318    
# Line 871  while (!done) Line 1329  while (!done)
1329          {          {
1330          for (;;)          for (;;)
1331            {            {
1332            if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)            if (extend_inputline(infile, buffer, NULL) == NULL)
1333              {              {
1334              done = 1;              done = 1;
1335              goto CONTINUE;              goto CONTINUE;
# Line 906  while (!done) Line 1364  while (!done)
1364    
1365      if (do_study)      if (do_study)
1366        {        {
1367        if (timeit)        if (timeit > 0)
1368          {          {
1369          register int i;          register int i;
1370          clock_t time_taken;          clock_t time_taken;
1371          clock_t start_time = clock();          clock_t start_time = clock();
1372          for (i = 0; i < LOOPREPEAT; i++)          for (i = 0; i < timeit; i++)
1373            extra = pcre_study(re, study_options, &error);            extra = pcre_study(re, study_options, &error);
1374          time_taken = clock() - start_time;          time_taken = clock() - start_time;
1375          if (extra != NULL) free(extra);          if (extra != NULL) free(extra);
1376          fprintf(outfile, "  Study time %.3f milliseconds\n",          fprintf(outfile, "  Study time %.4f milliseconds\n",
1377            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /            (((double)time_taken * 1000.0) / (double)timeit) /
1378              (double)CLOCKS_PER_SEC);              (double)CLOCKS_PER_SEC);
1379          }          }
1380        extra = pcre_study(re, study_options, &error);        extra = pcre_study(re, study_options, &error);
# Line 934  while (!done) Line 1392  while (!done)
1392      if (do_flip)      if (do_flip)
1393        {        {
1394        real_pcre *rre = (real_pcre *)re;        real_pcre *rre = (real_pcre *)re;
1395        rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));        rre->magic_number =
1396            byteflip(rre->magic_number, sizeof(rre->magic_number));
1397        rre->size = byteflip(rre->size, sizeof(rre->size));        rre->size = byteflip(rre->size, sizeof(rre->size));
1398        rre->options = byteflip(rre->options, sizeof(rre->options));        rre->options = byteflip(rre->options, sizeof(rre->options));
1399        rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));        rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
1400        rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));        rre->top_bracket =
1401        rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));          (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1402        rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));        rre->top_backref =
1403        rre->name_table_offset = byteflip(rre->name_table_offset,          (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1404          rre->first_byte =
1405            (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
1406          rre->req_byte =
1407            (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
1408          rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1409          sizeof(rre->name_table_offset));          sizeof(rre->name_table_offset));
1410        rre->name_entry_size = byteflip(rre->name_entry_size,        rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
1411          sizeof(rre->name_entry_size));          sizeof(rre->name_entry_size));
1412        rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));        rre->name_count = (pcre_uint16)byteflip(rre->name_count,
1413            sizeof(rre->name_count));
1414    
1415        if (extra != NULL)        if (extra != NULL)
1416          {          {
# Line 959  while (!done) Line 1424  while (!done)
1424    
1425      SHOW_INFO:      SHOW_INFO:
1426    
1427        if (do_debug)
1428          {
1429          fprintf(outfile, "------------------------------------------------------------------\n");
1430          pcre_printint(re, outfile, debug_lengths);
1431          }
1432    
1433      if (do_showinfo)      if (do_showinfo)
1434        {        {
1435        unsigned long int get_options, all_options;        unsigned long int get_options, all_options;
1436    #if !defined NOINFOCHECK
1437        int old_first_char, old_options, old_count;        int old_first_char, old_options, old_count;
1438        int count, backrefmax, first_char, need_char;  #endif
1439          int count, backrefmax, first_char, need_char, okpartial, jchanged,
1440            hascrorlf;
1441        int nameentrysize, namecount;        int nameentrysize, namecount;
1442        const uschar *nametable;        const uschar *nametable;
1443    
       if (do_debug)  
         {  
         fprintf(outfile, "------------------------------------------------------------------\n");  
         _pcre_printint(re, outfile);  
         }  
   
1444        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1445        new_info(re, NULL, PCRE_INFO_SIZE, &size);        new_info(re, NULL, PCRE_INFO_SIZE, &size);
1446        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
# Line 982  while (!done) Line 1450  while (!done)
1450        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1451        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1452        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1453          new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1454          new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1455          new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1456    
1457    #if !defined NOINFOCHECK
1458        old_count = pcre_info(re, &old_options, &old_first_char);        old_count = pcre_info(re, &old_options, &old_first_char);
1459        if (count < 0) fprintf(outfile,        if (count < 0) fprintf(outfile,
1460          "Error %d from pcre_info()\n", count);          "Error %d from pcre_info()\n", count);
# Line 1000  while (!done) Line 1472  while (!done)
1472            "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",            "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1473              get_options, old_options);              get_options, old_options);
1474          }          }
1475    #endif
1476    
1477        if (size != regex_gotten_store) fprintf(outfile,        if (size != regex_gotten_store) fprintf(outfile,
1478          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
# Line 1021  while (!done) Line 1494  while (!done)
1494            }            }
1495          }          }
1496    
1497        /* The NOPARTIAL bit is a private bit in the options, so we have        if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1498        to fish it out via out back door */        if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
1499    
1500        all_options = ((real_pcre *)re)->options;        all_options = ((real_pcre *)re)->options;
1501        if (do_flip)        if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
         {  
         all_options = byteflip(all_options, sizeof(all_options));  
         }  
   
       if ((all_options & PCRE_NOPARTIAL) != 0)  
         fprintf(outfile, "Partial matching not supported\n");  
1502    
1503        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
1504          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1505            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1506            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1507            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1508            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1509            ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",            ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1510            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1511              ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
1512              ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
1513            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1514            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1515            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1516              ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1517            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1518            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "");            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1519              ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1520    
1521          if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1522    
1523          switch (get_options & PCRE_NEWLINE_BITS)
1524            {
1525            case PCRE_NEWLINE_CR:
1526            fprintf(outfile, "Forced newline sequence: CR\n");
1527            break;
1528    
1529        if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)          case PCRE_NEWLINE_LF:
1530          fprintf(outfile, "Case state changes\n");          fprintf(outfile, "Forced newline sequence: LF\n");
1531            break;
1532    
1533            case PCRE_NEWLINE_CRLF:
1534            fprintf(outfile, "Forced newline sequence: CRLF\n");
1535            break;
1536    
1537            case PCRE_NEWLINE_ANYCRLF:
1538            fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1539            break;
1540    
1541            case PCRE_NEWLINE_ANY:
1542            fprintf(outfile, "Forced newline sequence: ANY\n");
1543            break;
1544    
1545            default:
1546            break;
1547            }
1548    
1549        if (first_char == -1)        if (first_char == -1)
1550          {          {
1551          fprintf(outfile, "First char at start or follows \\n\n");          fprintf(outfile, "First char at start or follows newline\n");
1552          }          }
1553        else if (first_char < 0)        else if (first_char < 0)
1554          {          {
# Line 1063  while (!done) Line 1559  while (!done)
1559          int ch = first_char & 255;          int ch = first_char & 255;
1560          const char *caseless = ((first_char & REQ_CASELESS) == 0)?          const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1561            "" : " (caseless)";            "" : " (caseless)";
1562          if (isprint(ch))          if (PRINTHEX(ch))
1563            fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);            fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1564          else          else
1565            fprintf(outfile, "First char = %d%s\n", ch, caseless);            fprintf(outfile, "First char = %d%s\n", ch, caseless);
# Line 1078  while (!done) Line 1574  while (!done)
1574          int ch = need_char & 255;          int ch = need_char & 255;
1575          const char *caseless = ((need_char & REQ_CASELESS) == 0)?          const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1576            "" : " (caseless)";            "" : " (caseless)";
1577          if (isprint(ch))          if (PRINTHEX(ch))
1578            fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);            fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1579          else          else
1580            fprintf(outfile, "Need char = %d%s\n", ch, caseless);            fprintf(outfile, "Need char = %d%s\n", ch, caseless);
# Line 1114  while (!done) Line 1610  while (!done)
1610                    fprintf(outfile, "\n  ");                    fprintf(outfile, "\n  ");
1611                    c = 2;                    c = 2;
1612                    }                    }
1613                  if (isprint(i) && i != ' ')                  if (PRINTHEX(i) && i != ' ')
1614                    {                    {
1615                    fprintf(outfile, "%c ", i);                    fprintf(outfile, "%c ", i);
1616                    c += 2;                    c += 2;
# Line 1146  while (!done) Line 1642  while (!done)
1642        else        else
1643          {          {
1644          uschar sbuf[8];          uschar sbuf[8];
1645          sbuf[0] = (true_size >> 24)  & 255;          sbuf[0] = (uschar)((true_size >> 24) & 255);
1646          sbuf[1] = (true_size >> 16)  & 255;          sbuf[1] = (uschar)((true_size >> 16) & 255);
1647          sbuf[2] = (true_size >>  8)  & 255;          sbuf[2] = (uschar)((true_size >>  8) & 255);
1648          sbuf[3] = (true_size)  & 255;          sbuf[3] = (uschar)((true_size) & 255);
1649    
1650          sbuf[4] = (true_study_size >> 24)  & 255;          sbuf[4] = (uschar)((true_study_size >> 24) & 255);
1651          sbuf[5] = (true_study_size >> 16)  & 255;          sbuf[5] = (uschar)((true_study_size >> 16) & 255);
1652          sbuf[6] = (true_study_size >>  8)  & 255;          sbuf[6] = (uschar)((true_study_size >>  8) & 255);
1653          sbuf[7] = (true_study_size)  & 255;          sbuf[7] = (uschar)((true_study_size) & 255);
1654    
1655          if (fwrite(sbuf, 1, 8, f) < 8 ||          if (fwrite(sbuf, 1, 8, f) < 8 ||
1656              fwrite(re, 1, true_size, f) < true_size)              fwrite(re, 1, true_size, f) < true_size)
# Line 1173  while (!done) Line 1669  while (!done)
1669                  strerror(errno));                  strerror(errno));
1670                }                }
1671              else fprintf(outfile, "Study data written to %s\n", to_file);              else fprintf(outfile, "Study data written to %s\n", to_file);
1672    
1673              }              }
1674            }            }
1675          fclose(f);          fclose(f);
# Line 1189  while (!done) Line 1686  while (!done)
1686    
1687    for (;;)    for (;;)
1688      {      {
1689      unsigned char *q;      uschar *q;
1690      unsigned char *bptr = dbuffer;      uschar *bptr;
1691      int *use_offsets = offsets;      int *use_offsets = offsets;
1692      int use_size_offsets = size_offsets;      int use_size_offsets = size_offsets;
1693      int callout_data = 0;      int callout_data = 0;
# Line 1207  while (!done) Line 1704  while (!done)
1704    
1705      options = 0;      options = 0;
1706    
1707        *copynames = 0;
1708        *getnames = 0;
1709    
1710        copynamesptr = copynames;
1711        getnamesptr = getnames;
1712    
1713      pcre_callout = callout;      pcre_callout = callout;
1714      first_callout = 1;      first_callout = 1;
1715      callout_extra = 0;      callout_extra = 0;
# Line 1215  while (!done) Line 1718  while (!done)
1718      callout_fail_id = -1;      callout_fail_id = -1;
1719      show_malloc = 0;      show_malloc = 0;
1720    
1721      if (infile == stdin) printf("data> ");      if (extra != NULL) extra->flags &=
1722      if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)        ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1723    
1724        len = 0;
1725        for (;;)
1726        {        {
1727        done = 1;        if (extend_inputline(infile, buffer + len, "data> ") == NULL)
1728        goto CONTINUE;          {
1729            if (len > 0) break;
1730            done = 1;
1731            goto CONTINUE;
1732            }
1733          if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1734          len = (int)strlen((char *)buffer);
1735          if (buffer[len-1] == '\n') break;
1736        }        }
     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);  
1737    
     len = (int)strlen((char *)buffer);  
1738      while (len > 0 && isspace(buffer[len-1])) len--;      while (len > 0 && isspace(buffer[len-1])) len--;
1739      buffer[len] = 0;      buffer[len] = 0;
1740      if (len == 0) break;      if (len == 0) break;
# Line 1231  while (!done) Line 1742  while (!done)
1742      p = buffer;      p = buffer;
1743      while (isspace(*p)) p++;      while (isspace(*p)) p++;
1744    
1745      q = dbuffer;      bptr = q = dbuffer;
1746      while ((c = *p++) != 0)      while ((c = *p++) != 0)
1747        {        {
1748        int i = 0;        int i = 0;
# Line 1253  while (!done) Line 1764  while (!done)
1764          c -= '0';          c -= '0';
1765          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1766            c = c * 8 + *p++ - '0';            c = c * 8 + *p++ - '0';
1767    
1768    #if !defined NOUTF8
1769            if (use_utf8 && c > 255)
1770              {
1771              unsigned char buff8[8];
1772              int ii, utn;
1773              utn = ord2utf8(c, buff8);
1774              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1775              c = buff8[ii];   /* Last byte */
1776              }
1777    #endif
1778          break;          break;
1779    
1780          case 'x':          case 'x':
1781    
1782          /* Handle \x{..} specially - new Perl thing for utf8 */          /* Handle \x{..} specially - new Perl thing for utf8 */
1783    
1784    #if !defined NOUTF8
1785          if (*p == '{')          if (*p == '{')
1786            {            {
1787            unsigned char *pt = p;            unsigned char *pt = p;
# Line 1269  while (!done) Line 1792  while (!done)
1792              {              {
1793              unsigned char buff8[8];              unsigned char buff8[8];
1794              int ii, utn;              int ii, utn;
1795              utn = _pcre_ord2utf8(c, buff8);              utn = ord2utf8(c, buff8);
1796              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1797              c = buff8[ii];   /* Last byte */              c = buff8[ii];   /* Last byte */
1798              p = pt + 1;              p = pt + 1;
# Line 1277  while (!done) Line 1800  while (!done)
1800              }              }
1801            /* Not correct form; fall through */            /* Not correct form; fall through */
1802            }            }
1803    #endif
1804    
1805          /* Ordinary \x */          /* Ordinary \x */
1806    
# Line 1312  while (!done) Line 1836  while (!done)
1836            }            }
1837          else if (isalnum(*p))          else if (isalnum(*p))
1838            {            {
1839            uschar name[256];            uschar *npp = copynamesptr;
           uschar *npp = name;  
1840            while (isalnum(*p)) *npp++ = *p++;            while (isalnum(*p)) *npp++ = *p++;
1841              *npp++ = 0;
1842            *npp = 0;            *npp = 0;
1843            n = pcre_get_stringnumber(re, (char *)name);            n = pcre_get_stringnumber(re, (char *)copynamesptr);
1844            if (n < 0)            if (n < 0)
1845              fprintf(outfile, "no parentheses with name \"%s\"\n", name);              fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1846            else copystrings |= 1 << n;            copynamesptr = npp;
1847            }            }
1848          else if (*p == '+')          else if (*p == '+')
1849            {            {
# Line 1357  while (!done) Line 1881  while (!done)
1881            }            }
1882          continue;          continue;
1883    
1884    #if !defined NODFA
1885          case 'D':          case 'D':
1886    #if !defined NOPOSIX
1887          if (posix || do_posix)          if (posix || do_posix)
1888            printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");            printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1889          else          else
1890    #endif
1891            use_dfa = 1;            use_dfa = 1;
1892          continue;          continue;
1893    
1894          case 'F':          case 'F':
1895          options |= PCRE_DFA_SHORTEST;          options |= PCRE_DFA_SHORTEST;
1896          continue;          continue;
1897    #endif
1898    
1899          case 'G':          case 'G':
1900          if (isdigit(*p))          if (isdigit(*p))
# Line 1376  while (!done) Line 1904  while (!done)
1904            }            }
1905          else if (isalnum(*p))          else if (isalnum(*p))
1906            {            {
1907            uschar name[256];            uschar *npp = getnamesptr;
           uschar *npp = name;  
1908            while (isalnum(*p)) *npp++ = *p++;            while (isalnum(*p)) *npp++ = *p++;
1909              *npp++ = 0;
1910            *npp = 0;            *npp = 0;
1911            n = pcre_get_stringnumber(re, (char *)name);            n = pcre_get_stringnumber(re, (char *)getnamesptr);
1912            if (n < 0)            if (n < 0)
1913              fprintf(outfile, "no parentheses with name \"%s\"\n", name);              fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1914            else getstrings |= 1 << n;            getnamesptr = npp;
1915            }            }
1916          continue;          continue;
1917    
# Line 1409  while (!done) Line 1937  while (!done)
1937            if (offsets == NULL)            if (offsets == NULL)
1938              {              {
1939              printf("** Failed to get %d bytes of memory for offsets vector\n",              printf("** Failed to get %d bytes of memory for offsets vector\n",
1940                size_offsets_max * sizeof(int));                (int)(size_offsets_max * sizeof(int)));
1941              yield = 1;              yield = 1;
1942              goto EXIT;              goto EXIT;
1943              }              }
# Line 1422  while (!done) Line 1950  while (!done)
1950          options |= PCRE_PARTIAL;          options |= PCRE_PARTIAL;
1951          continue;          continue;
1952    
1953            case 'Q':
1954            while(isdigit(*p)) n = n * 10 + *p++ - '0';
1955            if (extra == NULL)
1956              {
1957              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1958              extra->flags = 0;
1959              }
1960            extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1961            extra->match_limit_recursion = n;
1962            continue;
1963    
1964            case 'q':
1965            while(isdigit(*p)) n = n * 10 + *p++ - '0';
1966            if (extra == NULL)
1967              {
1968              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1969              extra->flags = 0;
1970              }
1971            extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1972            extra->match_limit = n;
1973            continue;
1974    
1975    #if !defined NODFA
1976          case 'R':          case 'R':
1977          options |= PCRE_DFA_RESTART;          options |= PCRE_DFA_RESTART;
1978          continue;          continue;
1979    #endif
1980    
1981          case 'S':          case 'S':
1982          show_malloc = 1;          show_malloc = 1;
# Line 1437  while (!done) Line 1989  while (!done)
1989          case '?':          case '?':
1990          options |= PCRE_NO_UTF8_CHECK;          options |= PCRE_NO_UTF8_CHECK;
1991          continue;          continue;
1992    
1993            case '<':
1994              {
1995              int x = check_newline(p, outfile);
1996              if (x == 0) goto NEXT_DATA;
1997              options |= x;
1998              while (*p++ != '>');
1999              }
2000            continue;
2001          }          }
2002        *q++ = c;        *q++ = c;
2003        }        }
# Line 1467  while (!done) Line 2028  while (!done)
2028    
2029        if (rc != 0)        if (rc != 0)
2030          {          {
2031          (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);          (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2032          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
2033          }          }
2034          else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
2035                  != 0)
2036            {
2037            fprintf(outfile, "Matched with REG_NOSUB\n");
2038            }
2039        else        else
2040          {          {
2041          size_t i;          size_t i;
# Line 1501  while (!done) Line 2067  while (!done)
2067    
2068      for (;; gmatched++)    /* Loop for /g or /G */      for (;; gmatched++)    /* Loop for /g or /G */
2069        {        {
2070        if (timeit)        if (timeitm > 0)
2071          {          {
2072          register int i;          register int i;
2073          clock_t time_taken;          clock_t time_taken;
2074          clock_t start_time = clock();          clock_t start_time = clock();
2075    
2076    #if !defined NODFA
2077          if (all_use_dfa || use_dfa)          if (all_use_dfa || use_dfa)
2078            {            {
2079            int workspace[1000];            int workspace[1000];
2080            for (i = 0; i < LOOPREPEAT; i++)            for (i = 0; i < timeitm; i++)
2081              count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,              count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2082                options | g_notempty, use_offsets, use_size_offsets, workspace,                options | g_notempty, use_offsets, use_size_offsets, workspace,
2083                sizeof(workspace)/sizeof(int));                sizeof(workspace)/sizeof(int));
2084            }            }
2085          else          else
2086    #endif
2087    
2088          for (i = 0; i < LOOPREPEAT; i++)          for (i = 0; i < timeitm; i++)
2089            count = pcre_exec(re, extra, (char *)bptr, len,            count = pcre_exec(re, extra, (char *)bptr, len,
2090              start_offset, options | g_notempty, use_offsets, use_size_offsets);              start_offset, options | g_notempty, use_offsets, use_size_offsets);
2091    
2092          time_taken = clock() - start_time;          time_taken = clock() - start_time;
2093          fprintf(outfile, "Execute time %.3f milliseconds\n",          fprintf(outfile, "Execute time %.4f milliseconds\n",
2094            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /            (((double)time_taken * 1000.0) / (double)timeitm) /
2095              (double)CLOCKS_PER_SEC);              (double)CLOCKS_PER_SEC);
2096          }          }
2097    
2098        /* If find_match_limit is set, we want to do repeated matches with        /* If find_match_limit is set, we want to do repeated matches with
2099        varying limits in order to find the minimum value. */        varying limits in order to find the minimum value for the match limit and
2100          for the recursion limit. */
2101    
2102        if (find_match_limit)        if (find_match_limit)
2103          {          {
         int min = 0;  
         int mid = 64;  
         int max = -1;  
   
2104          if (extra == NULL)          if (extra == NULL)
2105            {            {
2106            extra = (pcre_extra *)malloc(sizeof(pcre_extra));            extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2107            extra->flags = 0;            extra->flags = 0;
2108            }            }
         extra->flags |= PCRE_EXTRA_MATCH_LIMIT;  
   
         for (;;)  
           {  
           extra->match_limit = mid;  
           count = pcre_exec(re, extra, (char *)bptr, len, start_offset,  
             options | g_notempty, use_offsets, use_size_offsets);  
           if (count == PCRE_ERROR_MATCHLIMIT)  
             {  
             /* fprintf(outfile, "Testing match limit = %d\n", mid); */  
             min = mid;  
             mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;  
             }  
           else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||  
                                  count == PCRE_ERROR_PARTIAL)  
             {  
             if (mid == min + 1)  
               {  
               fprintf(outfile, "Minimum match limit = %d\n", mid);  
               break;  
               }  
             /* fprintf(outfile, "Testing match limit = %d\n", mid); */  
             max = mid;  
             mid = (min + mid)/2;  
             }  
           else break;    /* Some other error */  
           }  
2109    
2110          extra->flags &= ~PCRE_EXTRA_MATCH_LIMIT;          (void)check_match_limit(re, extra, bptr, len, start_offset,
2111              options|g_notempty, use_offsets, use_size_offsets,
2112              PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2113              PCRE_ERROR_MATCHLIMIT, "match()");
2114    
2115            count = check_match_limit(re, extra, bptr, len, start_offset,
2116              options|g_notempty, use_offsets, use_size_offsets,
2117              PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2118              PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2119          }          }
2120    
2121        /* If callout_data is set, use the interface with additional data */        /* If callout_data is set, use the interface with additional data */
# Line 1591  while (!done) Line 2137  while (!done)
2137        /* The normal case is just to do the match once, with the default        /* The normal case is just to do the match once, with the default
2138        value of match_limit. */        value of match_limit. */
2139    
2140    #if !defined NODFA
2141        else if (all_use_dfa || use_dfa)        else if (all_use_dfa || use_dfa)
2142          {          {
2143          int workspace[1000];          int workspace[1000];
# Line 1603  while (!done) Line 2150  while (!done)
2150            count = use_size_offsets/2;            count = use_size_offsets/2;
2151            }            }
2152          }          }
2153    #endif
2154    
2155        else        else
2156          {          {
# Line 1619  while (!done) Line 2167  while (!done)
2167    
2168        if (count >= 0)        if (count >= 0)
2169          {          {
2170          int i;          int i, maxcount;
2171    
2172    #if !defined NODFA
2173            if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2174    #endif
2175              maxcount = use_size_offsets/3;
2176    
2177            /* This is a check against a lunatic return value. */
2178    
2179            if (count > maxcount)
2180              {
2181              fprintf(outfile,
2182                "** PCRE error: returned count %d is too big for offset size %d\n",
2183                count, use_size_offsets);
2184              count = use_size_offsets/3;
2185              if (do_g || do_G)
2186                {
2187                fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2188                do_g = do_G = FALSE;        /* Break g/G loop */
2189                }
2190              }
2191    
2192          for (i = 0; i < count * 2; i += 2)          for (i = 0; i < count * 2; i += 2)
2193            {            {
2194            if (use_offsets[i] < 0)            if (use_offsets[i] < 0)
# Line 1647  while (!done) Line 2216  while (!done)
2216            {            {
2217            if ((copystrings & (1 << i)) != 0)            if ((copystrings & (1 << i)) != 0)
2218              {              {
2219              char copybuffer[16];              char copybuffer[256];
2220              int rc = pcre_copy_substring((char *)bptr, use_offsets, count,              int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2221                i, copybuffer, sizeof(copybuffer));                i, copybuffer, sizeof(copybuffer));
2222              if (rc < 0)              if (rc < 0)
# Line 1657  while (!done) Line 2226  while (!done)
2226              }              }
2227            }            }
2228    
2229            for (copynamesptr = copynames;
2230                 *copynamesptr != 0;
2231                 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2232              {
2233              char copybuffer[256];
2234              int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2235                count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2236              if (rc < 0)
2237                fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2238              else
2239                fprintf(outfile, "  C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2240              }
2241    
2242          for (i = 0; i < 32; i++)          for (i = 0; i < 32; i++)
2243            {            {
2244            if ((getstrings & (1 << i)) != 0)            if ((getstrings & (1 << i)) != 0)
# Line 1669  while (!done) Line 2251  while (!done)
2251              else              else
2252                {                {
2253                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
               /* free((void *)substring); */  
2254                pcre_free_substring(substring);                pcre_free_substring(substring);
2255                }                }
2256              }              }
2257            }            }
2258    
2259            for (getnamesptr = getnames;
2260                 *getnamesptr != 0;
2261                 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2262              {
2263              const char *substring;
2264              int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2265                count, (char *)getnamesptr, &substring);
2266              if (rc < 0)
2267                fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2268              else
2269                {
2270                fprintf(outfile, "  G %s (%d) %s\n", substring, rc, getnamesptr);
2271                pcre_free_substring(substring);
2272                }
2273              }
2274    
2275          if (getlist)          if (getlist)
2276            {            {
2277            const char **stringlist;            const char **stringlist;
# Line 1699  while (!done) Line 2296  while (!done)
2296        else if (count == PCRE_ERROR_PARTIAL)        else if (count == PCRE_ERROR_PARTIAL)
2297          {          {
2298          fprintf(outfile, "Partial match");          fprintf(outfile, "Partial match");
2299    #if !defined NODFA
2300          if ((all_use_dfa || use_dfa) && use_size_offsets > 2)          if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
2301            fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],            fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
2302              bptr + use_offsets[0]);              bptr + use_offsets[0]);
2303    #endif
2304          fprintf(outfile, "\n");          fprintf(outfile, "\n");
2305          break;  /* Out of the /g loop */          break;  /* Out of the /g loop */
2306          }          }
2307    
2308        /* Failed to match. If this is a /g or /G loop and we previously set        /* Failed to match. If this is a /g or /G loop and we previously set
2309        g_notempty after a null match, this is not necessarily the end.        g_notempty after a null match, this is not necessarily the end. We want
2310        We want to advance the start offset, and continue. In the case of UTF-8        to advance the start offset, and continue. We won't be at the end of the
2311        matching, the advance must be one character, not one byte. Fudge the        string - that was checked before setting g_notempty.
2312        offset values to achieve this. We won't be at the end of the string -  
2313        that was checked before setting g_notempty. */        Complication arises in the case when the newline option is "any" or
2314          "anycrlf". If the previous match was at the end of a line terminated by
2315          CRLF, an advance of one character just passes the \r, whereas we should
2316          prefer the longer newline sequence, as does the code in pcre_exec().
2317          Fudge the offset value to achieve this.
2318    
2319          Otherwise, in the case of UTF-8 matching, the advance must be one
2320          character, not one byte. */
2321    
2322        else        else
2323          {          {
2324          if (g_notempty != 0)          if (g_notempty != 0)
2325            {            {
2326            int onechar = 1;            int onechar = 1;
2327              unsigned int obits = ((real_pcre *)re)->options;
2328            use_offsets[0] = start_offset;            use_offsets[0] = start_offset;
2329            if (use_utf8)            if ((obits & PCRE_NEWLINE_BITS) == 0)
2330                {
2331                int d;
2332                (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2333                obits = (d == '\r')? PCRE_NEWLINE_CR :
2334                        (d == '\n')? PCRE_NEWLINE_LF :
2335                        (d == ('\r'<<8 | '\n'))? PCRE_NEWLINE_CRLF :
2336                        (d == -2)? PCRE_NEWLINE_ANYCRLF :
2337                        (d == -1)? PCRE_NEWLINE_ANY : 0;
2338                }
2339              if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2340                   (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2341                  &&
2342                  start_offset < len - 1 &&
2343                  bptr[start_offset] == '\r' &&
2344                  bptr[start_offset+1] == '\n')
2345                onechar++;
2346              else if (use_utf8)
2347              {              {
2348              while (start_offset + onechar < len)              while (start_offset + onechar < len)
2349                {                {
# Line 1754  while (!done) Line 2378  while (!done)
2378        character. */        character. */
2379    
2380        g_notempty = 0;        g_notempty = 0;
2381    
2382        if (use_offsets[0] == use_offsets[1])        if (use_offsets[0] == use_offsets[1])
2383          {          {
2384          if (use_offsets[0] == len) break;          if (use_offsets[0] == len) break;
# Line 1772  while (!done) Line 2397  while (!done)
2397          len -= use_offsets[1];          len -= use_offsets[1];
2398          }          }
2399        }  /* End of loop for /g and /G */        }  /* End of loop for /g and /G */
2400    
2401        NEXT_DATA: continue;
2402      }    /* End of loop for data lines */      }    /* End of loop for data lines */
2403    
2404    CONTINUE:    CONTINUE:
# Line 1786  while (!done) Line 2413  while (!done)
2413      {      {
2414      new_free((void *)tables);      new_free((void *)tables);
2415      setlocale(LC_CTYPE, "C");      setlocale(LC_CTYPE, "C");
2416        locale_set = 0;
2417      }      }
2418    }    }
2419    

Legend:
Removed from v.77  
changed lines
  Added in v.287

  ViewVC Help
Powered by ViewVC 1.1.5