/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 67 by nigel, Sat Feb 24 21:40:13 2007 UTC revision 91 by nigel, Sat Feb 24 21:41:34 2007 UTC
# Line 4  Line 4 
4    
5  /* This program was hacked up as a tester for PCRE. I really should have  /* This program was hacked up as a tester for PCRE. I really should have
6  written it more tidily in the first place. Will I ever learn? It has grown and  written it more tidily in the first place. Will I ever learn? It has grown and
7  been extended and consequently is now rather untidy in places. */  been extended and consequently is now rather, er, *very* untidy in places.
8    
9    -----------------------------------------------------------------------------
10    Redistribution and use in source and binary forms, with or without
11    modification, are permitted provided that the following conditions are met:
12    
13        * Redistributions of source code must retain the above copyright notice,
14          this list of conditions and the following disclaimer.
15    
16        * Redistributions in binary form must reproduce the above copyright
17          notice, this list of conditions and the following disclaimer in the
18          documentation and/or other materials provided with the distribution.
19    
20        * Neither the name of the University of Cambridge nor the names of its
21          contributors may be used to endorse or promote products derived from
22          this software without specific prior written permission.
23    
24    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34    POSSIBILITY OF SUCH DAMAGE.
35    -----------------------------------------------------------------------------
36    */
37    
38    
39  #include <ctype.h>  #include <ctype.h>
40  #include <stdio.h>  #include <stdio.h>
# Line 12  been extended and consequently is now ra Line 42  been extended and consequently is now ra
42  #include <stdlib.h>  #include <stdlib.h>
43  #include <time.h>  #include <time.h>
44  #include <locale.h>  #include <locale.h>
45    #include <errno.h>
46    
47  /* We need the internal info for displaying the results of pcre_study(). Also  #ifndef _WIN32
48  for getting the opcodes for showing compiled code. */  #include <sys/resource.h>
49    #endif
50    
51  #define PCRE_SPY        /* For Win32 build, import data, not export */  #define PCRE_SPY        /* For Win32 build, import data, not export */
52  #include "internal.h"  
53    /* We include pcre_internal.h because we need the internal info for displaying
54    the results of pcre_study() and we also need to know about the internal
55    macros, structures, and other internal data values; pcretest has "inside
56    information" compared to a program that strictly follows the PCRE API. */
57    
58    #include "pcre_internal.h"
59    
60    /* We need access to the data tables that PCRE uses. So as not to have to keep
61    two copies, we include the source file here, changing the names of the external
62    symbols to prevent clashes. */
63    
64    #define _pcre_utf8_table1      utf8_table1
65    #define _pcre_utf8_table1_size utf8_table1_size
66    #define _pcre_utf8_table2      utf8_table2
67    #define _pcre_utf8_table3      utf8_table3
68    #define _pcre_utf8_table4      utf8_table4
69    #define _pcre_utt              utt
70    #define _pcre_utt_size         utt_size
71    #define _pcre_OP_lengths       OP_lengths
72    
73    #include "pcre_tables.c"
74    
75    /* We also need the pcre_printint() function for printing out compiled
76    patterns. This function is in a separate file so that it can be included in
77    pcre_compile.c when that module is compiled with debugging enabled. */
78    
79    #include "pcre_printint.src"
80    
81    
82  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
83  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 27  Makefile. */ Line 87  Makefile. */
87  #include "pcreposix.h"  #include "pcreposix.h"
88  #endif  #endif
89    
90    /* It is also possible, for the benefit of the version imported into Exim, to
91    build pcretest without support for UTF8 (define NOUTF8), without the interface
92    to the DFA matcher (NODFA), and without the doublecheck of the old "info"
93    function (define NOINFOCHECK). */
94    
95    
96    /* Other parameters */
97    
98  #ifndef CLOCKS_PER_SEC  #ifndef CLOCKS_PER_SEC
99  #ifdef CLK_TCK  #ifdef CLK_TCK
100  #define CLOCKS_PER_SEC CLK_TCK  #define CLOCKS_PER_SEC CLK_TCK
# Line 35  Makefile. */ Line 103  Makefile. */
103  #endif  #endif
104  #endif  #endif
105    
106  #define LOOPREPEAT 50000  #define LOOPREPEAT 500000
107    
108    /* Static variables */
109    
110  static FILE *outfile;  static FILE *outfile;
111  static int log_store = 0;  static int log_store = 0;
# Line 45  static int callout_extra; Line 114  static int callout_extra;
114  static int callout_fail_count;  static int callout_fail_count;
115  static int callout_fail_id;  static int callout_fail_id;
116  static int first_callout;  static int first_callout;
117    static int show_malloc;
118  static int use_utf8;  static int use_utf8;
119  static size_t gotten_store;  static size_t gotten_store;
120    
121    /* The buffers grow automatically if very long input lines are encountered. */
122    
123    static int buffer_size = 50000;
124    static uschar *buffer = NULL;
125    static uschar *dbuffer = NULL;
126    static uschar *pbuffer = NULL;
127    
 static int utf8_table1[] = {  
   0x0000007f, 0x000007ff, 0x0000ffff, 0x001fffff, 0x03ffffff, 0x7fffffff};  
128    
 static int utf8_table2[] = {  
   0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};  
129    
130  static int utf8_table3[] = {  /*************************************************
131    0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};  *        Read or extend an input line            *
132    *************************************************/
133    
134    /* Input lines are read into buffer, but both patterns and data lines can be
135    continued over multiple input lines. In addition, if the buffer fills up, we
136    want to automatically expand it so as to be able to handle extremely large
137    lines that are needed for certain stress tests. When the input buffer is
138    expanded, the other two buffers must also be expanded likewise, and the
139    contents of pbuffer, which are a copy of the input for callouts, must be
140    preserved (for when expansion happens for a data line). This is not the most
141    optimal way of handling this, but hey, this is just a test program!
142    
143    Arguments:
144      f            the file to read
145      start        where in buffer to start (this *must* be within buffer)
146    
147    Returns:       pointer to the start of new data
148                   could be a copy of start, or could be moved
149                   NULL if no data read and EOF reached
150    */
151    
152    static uschar *
153    extend_inputline(FILE *f, uschar *start)
154    {
155    uschar *here = start;
156    
157    for (;;)
158      {
159      int rlen = buffer_size - (here - buffer);
160      if (rlen > 1000)
161        {
162        int dlen;
163        if (fgets((char *)here, rlen,  f) == NULL)
164          return (here == start)? NULL : start;
165        dlen = (int)strlen((char *)here);
166        if (dlen > 0 && here[dlen - 1] == '\n') return start;
167        here += dlen;
168        }
169    
170      else
171        {
172        int new_buffer_size = 2*buffer_size;
173        uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
174        uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
175        uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
176    
177        if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
178          {
179          fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
180          exit(1);
181          }
182    
183        memcpy(new_buffer, buffer, buffer_size);
184        memcpy(new_pbuffer, pbuffer, buffer_size);
185    
186        buffer_size = new_buffer_size;
187    
188        start = new_buffer + (start - buffer);
189        here = new_buffer + (here - buffer);
190    
191        free(buffer);
192        free(dbuffer);
193        free(pbuffer);
194    
195        buffer = new_buffer;
196        dbuffer = new_dbuffer;
197        pbuffer = new_pbuffer;
198        }
199      }
200    
201    return NULL;  /* Control never gets here */
202    }
203    
 /*************************************************  
 *         Print compiled regex                   *  
 *************************************************/  
204    
 /* The code for doing this is held in a separate file that is also included in  
 pcre.c when it is compiled with the debug switch. It defines a function called  
 print_internals(), which uses a table of opcode lengths defined by the macro  
 OP_LENGTHS, whose name must be OP_lengths. */  
205    
 static uschar OP_lengths[] = { OP_LENGTHS };  
206    
 #include "printint.c"  
207    
208    
209    
# Line 103  return(result); Line 234  return(result);
234    
235    
236    
 /*************************************************  
 *       Convert character value to UTF-8         *  
 *************************************************/  
   
 /* This function takes an integer value in the range 0 - 0x7fffffff  
 and encodes it as a UTF-8 character in 0 to 6 bytes.  
   
 Arguments:  
   cvalue     the character value  
   buffer     pointer to buffer for result - at least 6 bytes long  
   
 Returns:     number of characters placed in the buffer  
              -1 if input character is negative  
              0 if input character is positive but too big (only when  
              int is longer than 32 bits)  
 */  
   
 static int  
 ord2utf8(int cvalue, unsigned char *buffer)  
 {  
 register int i, j;  
 for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++)  
   if (cvalue <= utf8_table1[i]) break;  
 if (i >= sizeof(utf8_table1)/sizeof(int)) return 0;  
 if (cvalue < 0) return -1;  
   
 buffer += i;  
 for (j = i; j > 0; j--)  
  {  
  *buffer-- = 0x80 | (cvalue & 0x3f);  
  cvalue >>= 6;  
  }  
 *buffer = utf8_table2[i] | cvalue;  
 return i + 1;  
 }  
   
237    
238  /*************************************************  /*************************************************
239  *            Convert UTF-8 string to value       *  *            Convert UTF-8 string to value       *
# Line 148  return i + 1; Line 243  return i + 1;
243  and returns the value of the character.  and returns the value of the character.
244    
245  Argument:  Argument:
246    buffer   a pointer to the byte vector    utf8bytes   a pointer to the byte vector
247    vptr     a pointer to an int to receive the value    vptr        a pointer to an int to receive the value
248    
249  Returns:   >  0 => the number of bytes consumed  Returns:      >  0 => the number of bytes consumed
250             -6 to 0 => malformed UTF-8 character at offset = (-return)                -6 to 0 => malformed UTF-8 character at offset = (-return)
251  */  */
252    
253    #if !defined NOUTF8
254    
255  static int  static int
256  utf82ord(unsigned char *buffer, int *vptr)  utf82ord(unsigned char *utf8bytes, int *vptr)
257  {  {
258  int c = *buffer++;  int c = *utf8bytes++;
259  int d = c;  int d = c;
260  int i, j, s;  int i, j, s;
261    
# Line 178  d = (c & utf8_table3[i]) << s; Line 275  d = (c & utf8_table3[i]) << s;
275    
276  for (j = 0; j < i; j++)  for (j = 0; j < i; j++)
277    {    {
278    c = *buffer++;    c = *utf8bytes++;
279    if ((c & 0xc0) != 0x80) return -(j+1);    if ((c & 0xc0) != 0x80) return -(j+1);
280    s -= 6;    s -= 6;
281    d |= (c & 0x3f) << s;    d |= (c & 0x3f) << s;
# Line 186  for (j = 0; j < i; j++) Line 283  for (j = 0; j < i; j++)
283    
284  /* Check that encoding was the correct unique one */  /* Check that encoding was the correct unique one */
285    
286  for (j = 0; j < sizeof(utf8_table1)/sizeof(int); j++)  for (j = 0; j < utf8_table1_size; j++)
287    if (d <= utf8_table1[j]) break;    if (d <= utf8_table1[j]) break;
288  if (j != i) return -(i+1);  if (j != i) return -(i+1);
289    
# Line 196  if (j != i) return -(i+1); Line 293  if (j != i) return -(i+1);
293  return i+1;  return i+1;
294  }  }
295    
296    #endif
297    
298    
299    
300    /*************************************************
301    *       Convert character value to UTF-8         *
302    *************************************************/
303    
304    /* This function takes an integer value in the range 0 - 0x7fffffff
305    and encodes it as a UTF-8 character in 0 to 6 bytes.
306    
307    Arguments:
308      cvalue     the character value
309      utf8bytes  pointer to buffer for result - at least 6 bytes long
310    
311    Returns:     number of characters placed in the buffer
312    */
313    
314    static int
315    ord2utf8(int cvalue, uschar *utf8bytes)
316    {
317    register int i, j;
318    for (i = 0; i < utf8_table1_size; i++)
319      if (cvalue <= utf8_table1[i]) break;
320    utf8bytes += i;
321    for (j = i; j > 0; j--)
322     {
323     *utf8bytes-- = 0x80 | (cvalue & 0x3f);
324     cvalue >>= 6;
325     }
326    *utf8bytes = utf8_table2[i] | cvalue;
327    return i + 1;
328    }
329    
330    
331    
332  /*************************************************  /*************************************************
# Line 208  chars without printing. */ Line 339  chars without printing. */
339    
340  static int pchars(unsigned char *p, int length, FILE *f)  static int pchars(unsigned char *p, int length, FILE *f)
341  {  {
342  int c;  int c = 0;
343  int yield = 0;  int yield = 0;
344    
345  while (length-- > 0)  while (length-- > 0)
346    {    {
347    #if !defined NOUTF8
348    if (use_utf8)    if (use_utf8)
349      {      {
350      int rc = utf82ord(p, &c);      int rc = utf82ord(p, &c);
# Line 235  while (length-- > 0) Line 367  while (length-- > 0)
367        continue;        continue;
368        }        }
369      }      }
370    #endif
371    
372     /* Not UTF-8, or malformed UTF-8  */     /* Not UTF-8, or malformed UTF-8  */
373    
# Line 266  data is not zero. */ Line 399  data is not zero. */
399  static int callout(pcre_callout_block *cb)  static int callout(pcre_callout_block *cb)
400  {  {
401  FILE *f = (first_callout | callout_extra)? outfile : NULL;  FILE *f = (first_callout | callout_extra)? outfile : NULL;
402  int i, pre_start, post_start;  int i, pre_start, post_start, subject_length;
403    
404  if (callout_extra)  if (callout_extra)
405    {    {
# Line 297  pre_start = pchars((unsigned char *)cb-> Line 430  pre_start = pchars((unsigned char *)cb->
430  post_start = pchars((unsigned char *)(cb->subject + cb->start_match),  post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
431    cb->current_position - cb->start_match, f);    cb->current_position - cb->start_match, f);
432    
433    subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
434    
435  (void)pchars((unsigned char *)(cb->subject + cb->current_position),  (void)pchars((unsigned char *)(cb->subject + cb->current_position),
436    cb->subject_length - cb->current_position, f);    cb->subject_length - cb->current_position, f);
437    
438  if (f != NULL) fprintf(f, "\n");  if (f != NULL) fprintf(f, "\n");
439    
440  /* Always print appropriate indicators, with callout number if not already  /* Always print appropriate indicators, with callout number if not already
441  shown */  shown. For automatic callouts, show the pattern offset. */
442    
443  if (callout_extra) fprintf(outfile, "    ");  if (cb->callout_number == 255)
444    else fprintf(outfile, "%3d ", cb->callout_number);    {
445      fprintf(outfile, "%+3d ", cb->pattern_position);
446      if (cb->pattern_position > 99) fprintf(outfile, "\n    ");
447      }
448    else
449      {
450      if (callout_extra) fprintf(outfile, "    ");
451        else fprintf(outfile, "%3d ", cb->callout_number);
452      }
453    
454  for (i = 0; i < pre_start; i++) fprintf(outfile, " ");  for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
455  fprintf(outfile, "^");  fprintf(outfile, "^");
# Line 317  if (post_start > 0) Line 460  if (post_start > 0)
460    fprintf(outfile, "^");    fprintf(outfile, "^");
461    }    }
462    
463  fprintf(outfile, "\n");  for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
464      fprintf(outfile, " ");
465    
466    fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
467      pbuffer + cb->pattern_position);
468    
469    fprintf(outfile, "\n");
470  first_callout = 0;  first_callout = 0;
471    
472  if ((int)(cb->callout_data) != 0)  if (cb->callout_data != NULL)
473    {    {
474    fprintf(outfile, "Callout data = %d\n", (int)(cb->callout_data));    int callout_data = *((int *)(cb->callout_data));
475    return (int)(cb->callout_data);    if (callout_data != 0)
476        {
477        fprintf(outfile, "Callout data = %d\n", callout_data);
478        return callout_data;
479        }
480    }    }
481    
482  return (cb->callout_number != callout_fail_id)? 0 :  return (cb->callout_number != callout_fail_id)? 0 :
# Line 333  return (cb->callout_number != callout_fa Line 485  return (cb->callout_number != callout_fa
485    
486    
487  /*************************************************  /*************************************************
488  *            Local malloc function               *  *            Local malloc functions              *
489  *************************************************/  *************************************************/
490    
491  /* Alternative malloc function, to test functionality and show the size of the  /* Alternative malloc function, to test functionality and show the size of the
# Line 341  compiled re. */ Line 493  compiled re. */
493    
494  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
495  {  {
496    void *block = malloc(size);
497  gotten_store = size;  gotten_store = size;
498  return malloc(size);  if (show_malloc)
499      fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
500    return block;
501    }
502    
503    static void new_free(void *block)
504    {
505    if (show_malloc)
506      fprintf(outfile, "free             %p\n", block);
507    free(block);
508    }
509    
510    
511    /* For recursion malloc/free, to test stacking calls */
512    
513    static void *stack_malloc(size_t size)
514    {
515    void *block = malloc(size);
516    if (show_malloc)
517      fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
518    return block;
519  }  }
520    
521    static void stack_free(void *block)
522    {
523    if (show_malloc)
524      fprintf(outfile, "stack_free       %p\n", block);
525    free(block);
526    }
527    
528    
529  /*************************************************  /*************************************************
# Line 363  if ((rc = pcre_fullinfo(re, study, optio Line 542  if ((rc = pcre_fullinfo(re, study, optio
542    
543    
544  /*************************************************  /*************************************************
545    *         Byte flipping function                 *
546    *************************************************/
547    
548    static unsigned long int
549    byteflip(unsigned long int value, int n)
550    {
551    if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
552    return ((value & 0x000000ff) << 24) |
553           ((value & 0x0000ff00) <<  8) |
554           ((value & 0x00ff0000) >>  8) |
555           ((value & 0xff000000) >> 24);
556    }
557    
558    
559    
560    
561    /*************************************************
562    *        Check match or recursion limit          *
563    *************************************************/
564    
565    static int
566    check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
567      int start_offset, int options, int *use_offsets, int use_size_offsets,
568      int flag, unsigned long int *limit, int errnumber, const char *msg)
569    {
570    int count;
571    int min = 0;
572    int mid = 64;
573    int max = -1;
574    
575    extra->flags |= flag;
576    
577    for (;;)
578      {
579      *limit = mid;
580    
581      count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
582        use_offsets, use_size_offsets);
583    
584      if (count == errnumber)
585        {
586        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
587        min = mid;
588        mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
589        }
590    
591      else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
592                             count == PCRE_ERROR_PARTIAL)
593        {
594        if (mid == min + 1)
595          {
596          fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
597          break;
598          }
599        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
600        max = mid;
601        mid = (min + mid)/2;
602        }
603      else break;    /* Some other error */
604      }
605    
606    extra->flags &= ~flag;
607    return count;
608    }
609    
610    
611    
612    /*************************************************
613    *         Check newline indicator                *
614    *************************************************/
615    
616    /* This is used both at compile and run-time to check for <xxx> escapes, where
617    xxx is LF, CR, or CRLF. Print a message and return 0 if there is no match.
618    
619    Arguments:
620      p           points after the leading '<'
621      f           file for error message
622    
623    Returns:      appropriate PCRE_NEWLINE_xxx flags, or 0
624    */
625    
626    static int
627    check_newline(uschar *p, FILE *f)
628    {
629    if (strncmp((char *)p, "cr>", 3) == 0) return PCRE_NEWLINE_CR;
630    if (strncmp((char *)p, "lf>", 3) == 0) return PCRE_NEWLINE_LF;
631    if (strncmp((char *)p, "crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
632    fprintf(f, "Unknown newline type at: <%s\n", p);
633    return 0;
634    }
635    
636    
637    
638    /*************************************************
639  *                Main Program                    *  *                Main Program                    *
640  *************************************************/  *************************************************/
641    
# Line 379  int op = 1; Line 652  int op = 1;
652  int timeit = 0;  int timeit = 0;
653  int showinfo = 0;  int showinfo = 0;
654  int showstore = 0;  int showstore = 0;
655    int quiet = 0;
656  int size_offsets = 45;  int size_offsets = 45;
657  int size_offsets_max;  int size_offsets_max;
658  int *offsets;  int *offsets = NULL;
659  #if !defined NOPOSIX  #if !defined NOPOSIX
660  int posix = 0;  int posix = 0;
661  #endif  #endif
662  int debug = 0;  int debug = 0;
663  int done = 0;  int done = 0;
664  unsigned char buffer[30000];  int all_use_dfa = 0;
665  unsigned char dbuffer[1024];  int yield = 0;
666    int stack_size;
667    
668    /* These vectors store, end-to-end, a list of captured substring names. Assume
669    that 1024 is plenty long enough for the few names we'll be testing. */
670    
671    uschar copynames[1024];
672    uschar getnames[1024];
673    
674  /* Static so that new_malloc can use it. */  uschar *copynamesptr;
675    uschar *getnamesptr;
676    
677    /* Get buffers from malloc() so that Electric Fence will check their misuse
678    when I am debugging. They grow automatically when very long lines are read. */
679    
680    buffer = (unsigned char *)malloc(buffer_size);
681    dbuffer = (unsigned char *)malloc(buffer_size);
682    pbuffer = (unsigned char *)malloc(buffer_size);
683    
684    /* The outfile variable is static so that new_malloc can use it. The _setmode()
685    stuff is some magic that I don't understand, but which apparently does good
686    things in Windows. It's related to line terminations.  */
687    
688    #if defined(_WIN32) || defined(WIN32)
689    _setmode( _fileno( stdout ), 0x8000 );
690    #endif  /* defined(_WIN32) || defined(WIN32) */
691    
692  outfile = stdout;  outfile = stdout;
693    
# Line 403  while (argc > 1 && argv[op][0] == '-') Line 700  while (argc > 1 && argv[op][0] == '-')
700    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
701      showstore = 1;      showstore = 1;
702    else if (strcmp(argv[op], "-t") == 0) timeit = 1;    else if (strcmp(argv[op], "-t") == 0) timeit = 1;
703      else if (strcmp(argv[op], "-q") == 0) quiet = 1;
704    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
705    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
706    #if !defined NODFA
707      else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
708    #endif
709    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
710        ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),        ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
711          *endptr == 0))          *endptr == 0))
# Line 412  while (argc > 1 && argv[op][0] == '-') Line 713  while (argc > 1 && argv[op][0] == '-')
713      op++;      op++;
714      argc--;      argc--;
715      }      }
716      else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
717          ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
718            *endptr == 0))
719        {
720    #ifdef _WIN32
721        printf("PCRE: -S not supported on this OS\n");
722        exit(1);
723    #else
724        int rc;
725        struct rlimit rlim;
726        getrlimit(RLIMIT_STACK, &rlim);
727        rlim.rlim_cur = stack_size * 1024 * 1024;
728        rc = setrlimit(RLIMIT_STACK, &rlim);
729        if (rc != 0)
730          {
731        printf("PCRE: setrlimit() failed with error %d\n", rc);
732        exit(1);
733          }
734        op++;
735        argc--;
736    #endif
737        }
738  #if !defined NOPOSIX  #if !defined NOPOSIX
739    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(argv[op], "-p") == 0) posix = 1;
740  #endif  #endif
# Line 422  while (argc > 1 && argv[op][0] == '-') Line 745  while (argc > 1 && argv[op][0] == '-')
745      printf("Compiled with\n");      printf("Compiled with\n");
746      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
747      printf("  %sUTF-8 support\n", rc? "" : "No ");      printf("  %sUTF-8 support\n", rc? "" : "No ");
748        (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
749        printf("  %sUnicode properties support\n", rc? "" : "No ");
750      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
751      printf("  Newline character is %s\n", (rc == '\r')? "CR" : "LF");      printf("  Newline sequence is %s\n", (rc == '\r')? "CR" :
752          (rc == '\n')? "LF" : "CRLF");
753      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
754      printf("  Internal link size = %d\n", rc);      printf("  Internal link size = %d\n", rc);
755      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
756      printf("  POSIX malloc threshold = %d\n", rc);      printf("  POSIX malloc threshold = %d\n", rc);
757      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
758      printf("  Default match limit = %d\n", rc);      printf("  Default match limit = %d\n", rc);
759        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);
760        printf("  Default recursion depth limit = %d\n", rc);
761        (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
762        printf("  Match recursion uses %s\n", rc? "stack" : "heap");
763      exit(0);      exit(0);
764      }      }
765    else    else
766      {      {
767      printf("** Unknown or malformed option %s\n", argv[op]);      printf("** Unknown or malformed option %s\n", argv[op]);
768      printf("Usage:   pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");      printf("Usage:   pcretest [options] [<input> [<output>]]\n");
769      printf("  -C     show PCRE compile-time options and exit\n");      printf("  -C     show PCRE compile-time options and exit\n");
770      printf("  -d     debug: show compiled code; implies -i\n"      printf("  -d     debug: show compiled code; implies -i\n");
771             "  -i     show information about compiled pattern\n"  #if !defined NODFA
772        printf("  -dfa   force DFA matching for all subjects\n");
773    #endif
774        printf("  -i     show information about compiled pattern\n"
775               "  -m     output memory used information\n"
776             "  -o <n> set size of offsets vector to <n>\n");             "  -o <n> set size of offsets vector to <n>\n");
777  #if !defined NOPOSIX  #if !defined NOPOSIX
778      printf("  -p     use POSIX interface\n");      printf("  -p     use POSIX interface\n");
779  #endif  #endif
780      printf("  -s     output store information\n"      printf("  -S <n> set stack size to <n> megabytes\n");
781        printf("  -s     output store (memory) used information\n"
782             "  -t     time compilation and execution\n");             "  -t     time compilation and execution\n");
783      return 1;      yield = 1;
784        goto EXIT;
785      }      }
786    op++;    op++;
787    argc--;    argc--;
# Line 454  while (argc > 1 && argv[op][0] == '-') Line 790  while (argc > 1 && argv[op][0] == '-')
790  /* Get the store for the offsets vector, and remember what it was */  /* Get the store for the offsets vector, and remember what it was */
791    
792  size_offsets_max = size_offsets;  size_offsets_max = size_offsets;
793  offsets = malloc(size_offsets_max * sizeof(int));  offsets = (int *)malloc(size_offsets_max * sizeof(int));
794  if (offsets == NULL)  if (offsets == NULL)
795    {    {
796    printf("** Failed to get %d bytes of memory for offsets vector\n",    printf("** Failed to get %d bytes of memory for offsets vector\n",
797      size_offsets_max * sizeof(int));      size_offsets_max * sizeof(int));
798    return 1;    yield = 1;
799      goto EXIT;
800    }    }
801    
802  /* Sort out the input and output files */  /* Sort out the input and output files */
803    
804  if (argc > 1)  if (argc > 1)
805    {    {
806    infile = fopen(argv[op], "r");    infile = fopen(argv[op], "rb");
807    if (infile == NULL)    if (infile == NULL)
808      {      {
809      printf("** Failed to open %s\n", argv[op]);      printf("** Failed to open %s\n", argv[op]);
810      return 1;      yield = 1;
811        goto EXIT;
812      }      }
813    }    }
814    
815  if (argc > 2)  if (argc > 2)
816    {    {
817    outfile = fopen(argv[op+1], "w");    outfile = fopen(argv[op+1], "wb");
818    if (outfile == NULL)    if (outfile == NULL)
819      {      {
820      printf("** Failed to open %s\n", argv[op+1]);      printf("** Failed to open %s\n", argv[op+1]);
821      return 1;      yield = 1;
822        goto EXIT;
823      }      }
824    }    }
825    
826  /* Set alternative malloc function */  /* Set alternative malloc function */
827    
828  pcre_malloc = new_malloc;  pcre_malloc = new_malloc;
829    pcre_free = new_free;
830    pcre_stack_malloc = stack_malloc;
831    pcre_stack_free = stack_free;
832    
833  /* Heading line, then prompt for first regex if stdin */  /* Heading line unless quiet, then prompt for first regex if stdin */
834    
835  fprintf(outfile, "PCRE version %s\n\n", pcre_version());  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
836    
837  /* Main loop */  /* Main loop */
838    
# Line 506  while (!done) Line 848  while (!done)
848    
849    const char *error;    const char *error;
850    unsigned char *p, *pp, *ppp;    unsigned char *p, *pp, *ppp;
851      unsigned char *to_file = NULL;
852    const unsigned char *tables = NULL;    const unsigned char *tables = NULL;
853      unsigned long int true_size, true_study_size = 0;
854      size_t size, regex_gotten_store;
855    int do_study = 0;    int do_study = 0;
856    int do_debug = debug;    int do_debug = debug;
857    int do_G = 0;    int do_G = 0;
858    int do_g = 0;    int do_g = 0;
859    int do_showinfo = showinfo;    int do_showinfo = showinfo;
860    int do_showrest = 0;    int do_showrest = 0;
861      int do_flip = 0;
862    int erroroffset, len, delimiter;    int erroroffset, len, delimiter;
863    
864    use_utf8 = 0;    use_utf8 = 0;
865    
866    if (infile == stdin) printf("  re> ");    if (infile == stdin) printf("  re> ");
867    if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;    if (extend_inputline(infile, buffer) == NULL) break;
868    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
869    fflush(outfile);    fflush(outfile);
870    
# Line 526  while (!done) Line 872  while (!done)
872    while (isspace(*p)) p++;    while (isspace(*p)) p++;
873    if (*p == 0) continue;    if (*p == 0) continue;
874    
875    /* Get the delimiter and seek the end of the pattern; if is isn't    /* See if the pattern is to be loaded pre-compiled from a file. */
876    complete, read more. */  
877      if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
878        {
879        unsigned long int magic, get_options;
880        uschar sbuf[8];
881        FILE *f;
882    
883        p++;
884        pp = p + (int)strlen((char *)p);
885        while (isspace(pp[-1])) pp--;
886        *pp = 0;
887    
888        f = fopen((char *)p, "rb");
889        if (f == NULL)
890          {
891          fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
892          continue;
893          }
894    
895        if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
896    
897        true_size =
898          (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
899        true_study_size =
900          (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
901    
902        re = (real_pcre *)new_malloc(true_size);
903        regex_gotten_store = gotten_store;
904    
905        if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
906    
907        magic = ((real_pcre *)re)->magic_number;
908        if (magic != MAGIC_NUMBER)
909          {
910          if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
911            {
912            do_flip = 1;
913            }
914          else
915            {
916            fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
917            fclose(f);
918            continue;
919            }
920          }
921    
922        fprintf(outfile, "Compiled regex%s loaded from %s\n",
923          do_flip? " (byte-inverted)" : "", p);
924    
925        /* Need to know if UTF-8 for printing data strings */
926    
927        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
928        use_utf8 = (get_options & PCRE_UTF8) != 0;
929    
930        /* Now see if there is any following study data */
931    
932        if (true_study_size != 0)
933          {
934          pcre_study_data *psd;
935    
936          extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
937          extra->flags = PCRE_EXTRA_STUDY_DATA;
938    
939          psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
940          extra->study_data = psd;
941    
942          if (fread(psd, 1, true_study_size, f) != true_study_size)
943            {
944            FAIL_READ:
945            fprintf(outfile, "Failed to read data from %s\n", p);
946            if (extra != NULL) new_free(extra);
947            if (re != NULL) new_free(re);
948            fclose(f);
949            continue;
950            }
951          fprintf(outfile, "Study data loaded from %s\n", p);
952          do_study = 1;     /* To get the data output if requested */
953          }
954        else fprintf(outfile, "No study data\n");
955    
956        fclose(f);
957        goto SHOW_INFO;
958        }
959    
960      /* In-line pattern (the usual case). Get the delimiter and seek the end of
961      the pattern; if is isn't complete, read more. */
962    
963    delimiter = *p++;    delimiter = *p++;
964    
# Line 548  while (!done) Line 979  while (!done)
979        pp++;        pp++;
980        }        }
981      if (*pp != 0) break;      if (*pp != 0) break;
   
     len = sizeof(buffer) - (pp - buffer);  
     if (len < 256)  
       {  
       fprintf(outfile, "** Expression too long - missing delimiter?\n");  
       goto SKIP_DATA;  
       }  
   
982      if (infile == stdin) printf("    > ");      if (infile == stdin) printf("    > ");
983      if (fgets((char *)pp, len, infile) == NULL)      if ((pp = extend_inputline(infile, pp)) == NULL)
984        {        {
985        fprintf(outfile, "** Unexpected EOF\n");        fprintf(outfile, "** Unexpected EOF\n");
986        done = 1;        done = 1;
# Line 572  while (!done) Line 995  while (!done)
995    
996    if (pp[1] == '\\') *pp++ = '\\';    if (pp[1] == '\\') *pp++ = '\\';
997    
998    /* Terminate the pattern at the delimiter */    /* Terminate the pattern at the delimiter, and save a copy of the pattern
999      for callouts. */
1000    
1001    *pp++ = 0;    *pp++ = 0;
1002      strcpy((char *)pbuffer, (char *)p);
1003    
1004    /* Look for options after final delimiter */    /* Look for options after final delimiter */
1005    
# Line 586  while (!done) Line 1011  while (!done)
1011      {      {
1012      switch (*pp++)      switch (*pp++)
1013        {        {
1014          case 'f': options |= PCRE_FIRSTLINE; break;
1015        case 'g': do_g = 1; break;        case 'g': do_g = 1; break;
1016        case 'i': options |= PCRE_CASELESS; break;        case 'i': options |= PCRE_CASELESS; break;
1017        case 'm': options |= PCRE_MULTILINE; break;        case 'm': options |= PCRE_MULTILINE; break;
# Line 594  while (!done) Line 1020  while (!done)
1020    
1021        case '+': do_showrest = 1; break;        case '+': do_showrest = 1; break;
1022        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
1023          case 'C': options |= PCRE_AUTO_CALLOUT; break;
1024        case 'D': do_debug = do_showinfo = 1; break;        case 'D': do_debug = do_showinfo = 1; break;
1025        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1026          case 'F': do_flip = 1; break;
1027        case 'G': do_G = 1; break;        case 'G': do_G = 1; break;
1028        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
1029          case 'J': options |= PCRE_DUPNAMES; break;
1030        case 'M': log_store = 1; break;        case 'M': log_store = 1; break;
1031        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1032    
# Line 609  while (!done) Line 1038  while (!done)
1038        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
1039        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
1040        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1041          case '?': options |= PCRE_NO_UTF8_CHECK; break;
1042    
1043        case 'L':        case 'L':
1044        ppp = pp;        ppp = pp;
1045        while (*ppp != '\n' && *ppp != ' ') ppp++;        /* The '\r' test here is so that it works on Windows */
1046          while (*ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1047        *ppp = 0;        *ppp = 0;
1048        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1049          {          {
# Line 623  while (!done) Line 1054  while (!done)
1054        pp = ppp;        pp = ppp;
1055        break;        break;
1056    
1057        case '\n': case ' ': break;        case '>':
1058          to_file = pp;
1059          while (*pp != 0) pp++;
1060          while (isspace(pp[-1])) pp--;
1061          *pp = 0;
1062          break;
1063    
1064          case '<':
1065            {
1066            int x = check_newline(pp, outfile);
1067            if (x == 0) goto SKIP_DATA;
1068            options |= x;
1069            while (*pp++ != '>');
1070            }
1071          break;
1072    
1073          case '\r':                      /* So that it works in Windows */
1074          case '\n':
1075          case ' ':
1076          break;
1077    
1078        default:        default:
1079        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1080        goto SKIP_DATA;        goto SKIP_DATA;
# Line 639  while (!done) Line 1090  while (!done)
1090      {      {
1091      int rc;      int rc;
1092      int cflags = 0;      int cflags = 0;
1093    
1094      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1095      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1096        if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1097        if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1098        if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1099    
1100      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
1101    
1102      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 648  while (!done) Line 1104  while (!done)
1104    
1105      if (rc != 0)      if (rc != 0)
1106        {        {
1107        (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));        (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1108        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1109        goto SKIP_DATA;        goto SKIP_DATA;
1110        }        }
# Line 689  while (!done) Line 1145  while (!done)
1145          {          {
1146          for (;;)          for (;;)
1147            {            {
1148            if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)            if (extend_inputline(infile, buffer) == NULL)
1149              {              {
1150              done = 1;              done = 1;
1151              goto CONTINUE;              goto CONTINUE;
# Line 713  while (!done) Line 1169  while (!done)
1169                sizeof(real_pcre) -                sizeof(real_pcre) -
1170                ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));                ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1171    
1172        /* Extract the size for possible writing before possibly flipping it,
1173        and remember the store that was got. */
1174    
1175        true_size = ((real_pcre *)re)->size;
1176        regex_gotten_store = gotten_store;
1177    
1178        /* If /S was present, study the regexp to generate additional info to
1179        help with the matching. */
1180    
1181        if (do_study)
1182          {
1183          if (timeit)
1184            {
1185            register int i;
1186            clock_t time_taken;
1187            clock_t start_time = clock();
1188            for (i = 0; i < LOOPREPEAT; i++)
1189              extra = pcre_study(re, study_options, &error);
1190            time_taken = clock() - start_time;
1191            if (extra != NULL) free(extra);
1192            fprintf(outfile, "  Study time %.3f milliseconds\n",
1193              (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
1194                (double)CLOCKS_PER_SEC);
1195            }
1196          extra = pcre_study(re, study_options, &error);
1197          if (error != NULL)
1198            fprintf(outfile, "Failed to study: %s\n", error);
1199          else if (extra != NULL)
1200            true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1201          }
1202    
1203        /* If the 'F' option was present, we flip the bytes of all the integer
1204        fields in the regex data block and the study block. This is to make it
1205        possible to test PCRE's handling of byte-flipped patterns, e.g. those
1206        compiled on a different architecture. */
1207    
1208        if (do_flip)
1209          {
1210          real_pcre *rre = (real_pcre *)re;
1211          rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));
1212          rre->size = byteflip(rre->size, sizeof(rre->size));
1213          rre->options = byteflip(rre->options, sizeof(rre->options));
1214          rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1215          rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));
1216          rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));
1217          rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));
1218          rre->name_table_offset = byteflip(rre->name_table_offset,
1219            sizeof(rre->name_table_offset));
1220          rre->name_entry_size = byteflip(rre->name_entry_size,
1221            sizeof(rre->name_entry_size));
1222          rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));
1223    
1224          if (extra != NULL)
1225            {
1226            pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1227            rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1228            rsd->options = byteflip(rsd->options, sizeof(rsd->options));
1229            }
1230          }
1231    
1232        /* Extract information from the compiled data if required */
1233    
1234        SHOW_INFO:
1235    
1236      if (do_showinfo)      if (do_showinfo)
1237        {        {
1238        unsigned long int get_options;        unsigned long int get_options, all_options;
1239    #if !defined NOINFOCHECK
1240        int old_first_char, old_options, old_count;        int old_first_char, old_options, old_count;
1241    #endif
1242        int count, backrefmax, first_char, need_char;        int count, backrefmax, first_char, need_char;
1243        int nameentrysize, namecount;        int nameentrysize, namecount;
1244        const uschar *nametable;        const uschar *nametable;
       size_t size;  
1245    
1246        if (do_debug)        if (do_debug)
1247          {          {
1248          fprintf(outfile, "------------------------------------------------------------------\n");          fprintf(outfile, "------------------------------------------------------------------\n");
1249          print_internals(re, outfile);          pcre_printint(re, outfile);
1250          }          }
1251    
1252        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
# Line 738  while (!done) Line 1259  while (!done)
1259        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1260        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1261    
1262    #if !defined NOINFOCHECK
1263        old_count = pcre_info(re, &old_options, &old_first_char);        old_count = pcre_info(re, &old_options, &old_first_char);
1264        if (count < 0) fprintf(outfile,        if (count < 0) fprintf(outfile,
1265          "Error %d from pcre_info()\n", count);          "Error %d from pcre_info()\n", count);
# Line 755  while (!done) Line 1277  while (!done)
1277            "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",            "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1278              get_options, old_options);              get_options, old_options);
1279          }          }
1280    #endif
1281    
1282        if (size != gotten_store) fprintf(outfile,        if (size != regex_gotten_store) fprintf(outfile,
1283          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1284          size, gotten_store);          (int)size, (int)regex_gotten_store);
1285    
1286        fprintf(outfile, "Capturing subpattern count = %d\n", count);        fprintf(outfile, "Capturing subpattern count = %d\n", count);
1287        if (backrefmax > 0)        if (backrefmax > 0)
# Line 776  while (!done) Line 1299  while (!done)
1299            }            }
1300          }          }
1301    
1302          /* The NOPARTIAL bit is a private bit in the options, so we have
1303          to fish it out via out back door */
1304    
1305          all_options = ((real_pcre *)re)->options;
1306          if (do_flip)
1307            {
1308            all_options = byteflip(all_options, sizeof(all_options));
1309             }
1310    
1311          if ((all_options & PCRE_NOPARTIAL) != 0)
1312            fprintf(outfile, "Partial matching not supported\n");
1313    
1314        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
1315          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1316            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1317            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1318            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1319            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1320              ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1321            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1322            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1323            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1324            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1325            ((get_options & PCRE_UTF8) != 0)? " utf8" : "");            ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1326              ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1327              ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1328              ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1329    
1330          switch (get_options & PCRE_NEWLINE_CRLF)
1331            {
1332            case PCRE_NEWLINE_CR:
1333            fprintf(outfile, "Forced newline sequence: CR\n");
1334            break;
1335    
1336        if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)          case PCRE_NEWLINE_LF:
1337          fprintf(outfile, "Case state changes\n");          fprintf(outfile, "Forced newline sequence: LF\n");
1338            break;
1339    
1340            case PCRE_NEWLINE_CRLF:
1341            fprintf(outfile, "Forced newline sequence: CRLF\n");
1342            break;
1343    
1344            default:
1345            break;
1346            }
1347    
1348        if (first_char == -1)        if (first_char == -1)
1349          {          {
1350          fprintf(outfile, "First char at start or follows \\n\n");          fprintf(outfile, "First char at start or follows newline\n");
1351          }          }
1352        else if (first_char < 0)        else if (first_char < 0)
1353          {          {
# Line 824  while (!done) Line 1378  while (!done)
1378          else          else
1379            fprintf(outfile, "Need char = %d%s\n", ch, caseless);            fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1380          }          }
       }  
1381    
1382      /* If /S was present, study the regexp to generate additional info to        /* Don't output study size; at present it is in any case a fixed
1383      help with the matching. */        value, but it varies, depending on the computer architecture, and
1384          so messes up the test suite. (And with the /F option, it might be
1385          flipped.) */
1386    
1387      if (do_study)        if (do_study)
       {  
       if (timeit)  
1388          {          {
1389          register int i;          if (extra == NULL)
1390          clock_t time_taken;            fprintf(outfile, "Study returned NULL\n");
1391          clock_t start_time = clock();          else
1392          for (i = 0; i < LOOPREPEAT; i++)            {
1393            extra = pcre_study(re, study_options, &error);            uschar *start_bits = NULL;
1394          time_taken = clock() - start_time;            new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1395          if (extra != NULL) free(extra);  
1396          fprintf(outfile, "  Study time %.3f milliseconds\n",            if (start_bits == NULL)
1397            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /              fprintf(outfile, "No starting byte set\n");
1398              (double)CLOCKS_PER_SEC);            else
1399                {
1400                int i;
1401                int c = 24;
1402                fprintf(outfile, "Starting byte set: ");
1403                for (i = 0; i < 256; i++)
1404                  {
1405                  if ((start_bits[i/8] & (1<<(i&7))) != 0)
1406                    {
1407                    if (c > 75)
1408                      {
1409                      fprintf(outfile, "\n  ");
1410                      c = 2;
1411                      }
1412                    if (isprint(i) && i != ' ')
1413                      {
1414                      fprintf(outfile, "%c ", i);
1415                      c += 2;
1416                      }
1417                    else
1418                      {
1419                      fprintf(outfile, "\\x%02x ", i);
1420                      c += 5;
1421                      }
1422                    }
1423                  }
1424                fprintf(outfile, "\n");
1425                }
1426              }
1427          }          }
1428          }
1429    
1430        extra = pcre_study(re, study_options, &error);      /* If the '>' option was present, we write out the regex to a file, and
1431        if (error != NULL)      that is all. The first 8 bytes of the file are the regex length and then
1432          fprintf(outfile, "Failed to study: %s\n", error);      the study length, in big-endian order. */
       else if (extra == NULL)  
         fprintf(outfile, "Study returned NULL\n");  
1433    
1434        else if (do_showinfo)      if (to_file != NULL)
1435          {
1436          FILE *f = fopen((char *)to_file, "wb");
1437          if (f == NULL)
1438          {          {
1439          size_t size;          fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1440          uschar *start_bits = NULL;          }
1441          new_info(re, extra, PCRE_INFO_STUDYSIZE, &size);        else
1442          new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);          {
1443          fprintf(outfile, "Study size = %d\n", size);          uschar sbuf[8];
1444          if (start_bits == NULL)          sbuf[0] = (true_size >> 24)  & 255;
1445            fprintf(outfile, "No starting character set\n");          sbuf[1] = (true_size >> 16)  & 255;
1446            sbuf[2] = (true_size >>  8)  & 255;
1447            sbuf[3] = (true_size)  & 255;
1448    
1449            sbuf[4] = (true_study_size >> 24)  & 255;
1450            sbuf[5] = (true_study_size >> 16)  & 255;
1451            sbuf[6] = (true_study_size >>  8)  & 255;
1452            sbuf[7] = (true_study_size)  & 255;
1453    
1454            if (fwrite(sbuf, 1, 8, f) < 8 ||
1455                fwrite(re, 1, true_size, f) < true_size)
1456              {
1457              fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1458              }
1459          else          else
1460            {            {
1461            int i;            fprintf(outfile, "Compiled regex written to %s\n", to_file);
1462            int c = 24;            if (extra != NULL)
           fprintf(outfile, "Starting character set: ");  
           for (i = 0; i < 256; i++)  
1463              {              {
1464              if ((start_bits[i/8] & (1<<(i%8))) != 0)              if (fwrite(extra->study_data, 1, true_study_size, f) <
1465                    true_study_size)
1466                {                {
1467                if (c > 75)                fprintf(outfile, "Write error on %s: %s\n", to_file,
1468                  {                  strerror(errno));
                 fprintf(outfile, "\n  ");  
                 c = 2;  
                 }  
               if (isprint(i) && i != ' ')  
                 {  
                 fprintf(outfile, "%c ", i);  
                 c += 2;  
                 }  
               else  
                 {  
                 fprintf(outfile, "\\x%02x ", i);  
                 c += 5;  
                 }  
1469                }                }
1470                else fprintf(outfile, "Study data written to %s\n", to_file);
1471              }              }
           fprintf(outfile, "\n");  
1472            }            }
1473            fclose(f);
1474          }          }
1475    
1476          new_free(re);
1477          if (extra != NULL) new_free(extra);
1478          if (tables != NULL) new_free((void *)tables);
1479          continue;  /* With next regex */
1480        }        }
1481      }      }        /* End of non-POSIX compile */
1482    
1483    /* Read data lines and test them */    /* Read data lines and test them */
1484    
1485    for (;;)    for (;;)
1486      {      {
1487      unsigned char *q;      uschar *q;
1488      unsigned char *bptr = dbuffer;      uschar *bptr = dbuffer;
1489      int *use_offsets = offsets;      int *use_offsets = offsets;
1490      int use_size_offsets = size_offsets;      int use_size_offsets = size_offsets;
1491      int callout_data = 0;      int callout_data = 0;
# Line 910  while (!done) Line 1498  while (!done)
1498      int gmatched = 0;      int gmatched = 0;
1499      int start_offset = 0;      int start_offset = 0;
1500      int g_notempty = 0;      int g_notempty = 0;
1501        int use_dfa = 0;
1502    
1503      options = 0;      options = 0;
1504    
1505        *copynames = 0;
1506        *getnames = 0;
1507    
1508        copynamesptr = copynames;
1509        getnamesptr = getnames;
1510    
1511      pcre_callout = callout;      pcre_callout = callout;
1512      first_callout = 1;      first_callout = 1;
1513      callout_extra = 0;      callout_extra = 0;
1514      callout_count = 0;      callout_count = 0;
1515      callout_fail_count = 999999;      callout_fail_count = 999999;
1516      callout_fail_id = -1;      callout_fail_id = -1;
1517        show_malloc = 0;
1518    
1519        if (extra != NULL) extra->flags &=
1520          ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1521    
1522      if (infile == stdin) printf("data> ");      len = 0;
1523      if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)      for (;;)
1524        {        {
1525        done = 1;        if (infile == stdin) printf("data> ");
1526        goto CONTINUE;        if (extend_inputline(infile, buffer + len) == NULL)
1527            {
1528            if (len > 0) break;
1529            done = 1;
1530            goto CONTINUE;
1531            }
1532          if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1533          len = (int)strlen((char *)buffer);
1534          if (buffer[len-1] == '\n') break;
1535        }        }
     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);  
1536    
     len = (int)strlen((char *)buffer);  
1537      while (len > 0 && isspace(buffer[len-1])) len--;      while (len > 0 && isspace(buffer[len-1])) len--;
1538      buffer[len] = 0;      buffer[len] = 0;
1539      if (len == 0) break;      if (len == 0) break;
# Line 958  while (!done) Line 1563  while (!done)
1563          c -= '0';          c -= '0';
1564          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1565            c = c * 8 + *p++ - '0';            c = c * 8 + *p++ - '0';
1566    
1567    #if !defined NOUTF8
1568            if (use_utf8 && c > 255)
1569              {
1570              unsigned char buff8[8];
1571              int ii, utn;
1572              utn = ord2utf8(c, buff8);
1573              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1574              c = buff8[ii];   /* Last byte */
1575              }
1576    #endif
1577          break;          break;
1578    
1579          case 'x':          case 'x':
1580    
1581          /* Handle \x{..} specially - new Perl thing for utf8 */          /* Handle \x{..} specially - new Perl thing for utf8 */
1582    
1583    #if !defined NOUTF8
1584          if (*p == '{')          if (*p == '{')
1585            {            {
1586            unsigned char *pt = p;            unsigned char *pt = p;
# Line 982  while (!done) Line 1599  while (!done)
1599              }              }
1600            /* Not correct form; fall through */            /* Not correct form; fall through */
1601            }            }
1602    #endif
1603    
1604          /* Ordinary \x */          /* Ordinary \x */
1605    
# Line 993  while (!done) Line 1611  while (!done)
1611            }            }
1612          break;          break;
1613    
1614          case 0:   /* Allows for an empty line */          case 0:   /* \ followed by EOF allows for an empty line */
1615          p--;          p--;
1616          continue;          continue;
1617    
1618            case '>':
1619            while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1620            continue;
1621    
1622          case 'A':  /* Option setting */          case 'A':  /* Option setting */
1623          options |= PCRE_ANCHORED;          options |= PCRE_ANCHORED;
1624          continue;          continue;
# Line 1013  while (!done) Line 1635  while (!done)
1635            }            }
1636          else if (isalnum(*p))          else if (isalnum(*p))
1637            {            {
1638            uschar name[256];            uschar *npp = copynamesptr;
           uschar *npp = name;  
1639            while (isalnum(*p)) *npp++ = *p++;            while (isalnum(*p)) *npp++ = *p++;
1640              *npp++ = 0;
1641            *npp = 0;            *npp = 0;
1642            n = pcre_get_stringnumber(re, (char *)name);            n = pcre_get_stringnumber(re, (char *)copynamesptr);
1643            if (n < 0)            if (n < 0)
1644              fprintf(outfile, "no parentheses with name \"%s\"\n", name);              fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1645            else copystrings |= 1 << n;            copynamesptr = npp;
1646            }            }
1647          else if (*p == '+')          else if (*p == '+')
1648            {            {
# Line 1058  while (!done) Line 1680  while (!done)
1680            }            }
1681          continue;          continue;
1682    
1683    #if !defined NODFA
1684            case 'D':
1685    #if !defined NOPOSIX
1686            if (posix || do_posix)
1687              printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1688            else
1689    #endif
1690              use_dfa = 1;
1691            continue;
1692    
1693            case 'F':
1694            options |= PCRE_DFA_SHORTEST;
1695            continue;
1696    #endif
1697    
1698          case 'G':          case 'G':
1699          if (isdigit(*p))          if (isdigit(*p))
1700            {            {
# Line 1066  while (!done) Line 1703  while (!done)
1703            }            }
1704          else if (isalnum(*p))          else if (isalnum(*p))
1705            {            {
1706            uschar name[256];            uschar *npp = getnamesptr;
           uschar *npp = name;  
1707            while (isalnum(*p)) *npp++ = *p++;            while (isalnum(*p)) *npp++ = *p++;
1708              *npp++ = 0;
1709            *npp = 0;            *npp = 0;
1710            n = pcre_get_stringnumber(re, (char *)name);            n = pcre_get_stringnumber(re, (char *)getnamesptr);
1711            if (n < 0)            if (n < 0)
1712              fprintf(outfile, "no parentheses with name \"%s\"\n", name);              fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1713            else getstrings |= 1 << n;            getnamesptr = npp;
1714            }            }
1715          continue;          continue;
1716    
# Line 1095  while (!done) Line 1732  while (!done)
1732            {            {
1733            size_offsets_max = n;            size_offsets_max = n;
1734            free(offsets);            free(offsets);
1735            use_offsets = offsets = malloc(size_offsets_max * sizeof(int));            use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1736            if (offsets == NULL)            if (offsets == NULL)
1737              {              {
1738              printf("** Failed to get %d bytes of memory for offsets vector\n",              printf("** Failed to get %d bytes of memory for offsets vector\n",
1739                size_offsets_max * sizeof(int));                size_offsets_max * sizeof(int));
1740              return 1;              yield = 1;
1741                goto EXIT;
1742              }              }
1743            }            }
1744          use_size_offsets = n;          use_size_offsets = n;
1745          if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */          if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */
1746          continue;          continue;
1747    
1748            case 'P':
1749            options |= PCRE_PARTIAL;
1750            continue;
1751    
1752            case 'Q':
1753            while(isdigit(*p)) n = n * 10 + *p++ - '0';
1754            if (extra == NULL)
1755              {
1756              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1757              extra->flags = 0;
1758              }
1759            extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1760            extra->match_limit_recursion = n;
1761            continue;
1762    
1763            case 'q':
1764            while(isdigit(*p)) n = n * 10 + *p++ - '0';
1765            if (extra == NULL)
1766              {
1767              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1768              extra->flags = 0;
1769              }
1770            extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1771            extra->match_limit = n;
1772            continue;
1773    
1774    #if !defined NODFA
1775            case 'R':
1776            options |= PCRE_DFA_RESTART;
1777            continue;
1778    #endif
1779    
1780            case 'S':
1781            show_malloc = 1;
1782            continue;
1783    
1784          case 'Z':          case 'Z':
1785          options |= PCRE_NOTEOL;          options |= PCRE_NOTEOL;
1786          continue;          continue;
1787    
1788            case '?':
1789            options |= PCRE_NO_UTF8_CHECK;
1790            continue;
1791    
1792            case '<':
1793              {
1794              int x = check_newline(p, outfile);
1795              if (x == 0) goto NEXT_DATA;
1796              options |= x;
1797              while (*p++ != '>');
1798              }
1799            continue;
1800          }          }
1801        *q++ = c;        *q++ = c;
1802        }        }
1803      *q = 0;      *q = 0;
1804      len = q - dbuffer;      len = q - dbuffer;
1805    
1806        if ((all_use_dfa || use_dfa) && find_match_limit)
1807          {
1808          printf("**Match limit not relevant for DFA matching: ignored\n");
1809          find_match_limit = 0;
1810          }
1811    
1812      /* Handle matching via the POSIX interface, which does not      /* Handle matching via the POSIX interface, which does not
1813      support timing or playing with the match limit or callout data. */      support timing or playing with the match limit or callout data. */
1814    
# Line 1126  while (!done) Line 1819  while (!done)
1819        int eflags = 0;        int eflags = 0;
1820        regmatch_t *pmatch = NULL;        regmatch_t *pmatch = NULL;
1821        if (use_size_offsets > 0)        if (use_size_offsets > 0)
1822          pmatch = malloc(sizeof(regmatch_t) * use_size_offsets);          pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
1823        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1824        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1825    
# Line 1134  while (!done) Line 1827  while (!done)
1827    
1828        if (rc != 0)        if (rc != 0)
1829          {          {
1830          (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));          (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1831          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1832          }          }
1833          else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
1834                  != 0)
1835            {
1836            fprintf(outfile, "Matched with REG_NOSUB\n");
1837            }
1838        else        else
1839          {          {
1840          size_t i;          size_t i;
# Line 1173  while (!done) Line 1871  while (!done)
1871          register int i;          register int i;
1872          clock_t time_taken;          clock_t time_taken;
1873          clock_t start_time = clock();          clock_t start_time = clock();
1874    
1875    #if !defined NODFA
1876            if (all_use_dfa || use_dfa)
1877              {
1878              int workspace[1000];
1879              for (i = 0; i < LOOPREPEAT; i++)
1880                count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
1881                  options | g_notempty, use_offsets, use_size_offsets, workspace,
1882                  sizeof(workspace)/sizeof(int));
1883              }
1884            else
1885    #endif
1886    
1887          for (i = 0; i < LOOPREPEAT; i++)          for (i = 0; i < LOOPREPEAT; i++)
1888            count = pcre_exec(re, extra, (char *)bptr, len,            count = pcre_exec(re, extra, (char *)bptr, len,
1889              start_offset, options | g_notempty, use_offsets, use_size_offsets);              start_offset, options | g_notempty, use_offsets, use_size_offsets);
1890    
1891          time_taken = clock() - start_time;          time_taken = clock() - start_time;
1892          fprintf(outfile, "Execute time %.3f milliseconds\n",          fprintf(outfile, "Execute time %.3f milliseconds\n",
1893            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
# Line 1183  while (!done) Line 1895  while (!done)
1895          }          }
1896    
1897        /* If find_match_limit is set, we want to do repeated matches with        /* If find_match_limit is set, we want to do repeated matches with
1898        varying limits in order to find the minimum value. */        varying limits in order to find the minimum value for the match limit and
1899          for the recursion limit. */
1900    
1901        if (find_match_limit)        if (find_match_limit)
1902          {          {
         int min = 0;  
         int mid = 64;  
         int max = -1;  
   
1903          if (extra == NULL)          if (extra == NULL)
1904            {            {
1905            extra = malloc(sizeof(pcre_extra));            extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1906            extra->flags = 0;            extra->flags = 0;
1907            }            }
         extra->flags |= PCRE_EXTRA_MATCH_LIMIT;  
1908    
1909          for (;;)          (void)check_match_limit(re, extra, bptr, len, start_offset,
1910            {            options|g_notempty, use_offsets, use_size_offsets,
1911            extra->match_limit = mid;            PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
1912            count = pcre_exec(re, extra, (char *)bptr, len, start_offset,            PCRE_ERROR_MATCHLIMIT, "match()");
1913              options | g_notempty, use_offsets, use_size_offsets);  
1914            if (count == PCRE_ERROR_MATCHLIMIT)          count = check_match_limit(re, extra, bptr, len, start_offset,
1915              {            options|g_notempty, use_offsets, use_size_offsets,
1916              /* fprintf(outfile, "Testing match limit = %d\n", mid); */            PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
1917              min = mid;            PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
             mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;  
             }  
           else if (count >= 0 || count == PCRE_ERROR_NOMATCH)  
             {  
             if (mid == min + 1)  
               {  
               fprintf(outfile, "Minimum match limit = %d\n", mid);  
               break;  
               }  
             /* fprintf(outfile, "Testing match limit = %d\n", mid); */  
             max = mid;  
             mid = (min + mid)/2;  
             }  
           else break;    /* Some other error */  
           }  
   
         extra->flags &= ~PCRE_EXTRA_MATCH_LIMIT;  
1918          }          }
1919    
1920        /* If callout_data is set, use the interface with additional data */        /* If callout_data is set, use the interface with additional data */
# Line 1232  while (!done) Line 1923  while (!done)
1923          {          {
1924          if (extra == NULL)          if (extra == NULL)
1925            {            {
1926            extra = malloc(sizeof(pcre_extra));            extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1927            extra->flags = 0;            extra->flags = 0;
1928            }            }
1929          extra->flags |= PCRE_EXTRA_CALLOUT_DATA;          extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
1930          extra->callout_data = (void *)callout_data;          extra->callout_data = &callout_data;
1931          count = pcre_exec(re, extra, (char *)bptr, len, start_offset,          count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
1932            options | g_notempty, use_offsets, use_size_offsets);            options | g_notempty, use_offsets, use_size_offsets);
1933          extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;          extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
# Line 1245  while (!done) Line 1936  while (!done)
1936        /* The normal case is just to do the match once, with the default        /* The normal case is just to do the match once, with the default
1937        value of match_limit. */        value of match_limit. */
1938    
1939        else count = pcre_exec(re, extra, (char *)bptr, len,  #if !defined NODFA
1940          start_offset, options | g_notempty, use_offsets, use_size_offsets);        else if (all_use_dfa || use_dfa)
1941            {
1942            int workspace[1000];
1943            count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
1944              options | g_notempty, use_offsets, use_size_offsets, workspace,
1945              sizeof(workspace)/sizeof(int));
1946            if (count == 0)
1947              {
1948              fprintf(outfile, "Matched, but too many subsidiary matches\n");
1949              count = use_size_offsets/2;
1950              }
1951            }
1952    #endif
1953    
1954        if (count == 0)        else
1955          {          {
1956          fprintf(outfile, "Matched, but too many substrings\n");          count = pcre_exec(re, extra, (char *)bptr, len,
1957          count = use_size_offsets/3;            start_offset, options | g_notempty, use_offsets, use_size_offsets);
1958            if (count == 0)
1959              {
1960              fprintf(outfile, "Matched, but too many substrings\n");
1961              count = use_size_offsets/3;
1962              }
1963          }          }
1964    
1965        /* Matched */        /* Matched */
# Line 1286  while (!done) Line 1994  while (!done)
1994            {            {
1995            if ((copystrings & (1 << i)) != 0)            if ((copystrings & (1 << i)) != 0)
1996              {              {
1997              char copybuffer[16];              char copybuffer[256];
1998              int rc = pcre_copy_substring((char *)bptr, use_offsets, count,              int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
1999                i, copybuffer, sizeof(copybuffer));                i, copybuffer, sizeof(copybuffer));
2000              if (rc < 0)              if (rc < 0)
# Line 1296  while (!done) Line 2004  while (!done)
2004              }              }
2005            }            }
2006    
2007            for (copynamesptr = copynames;
2008                 *copynamesptr != 0;
2009                 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2010              {
2011              char copybuffer[256];
2012              int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2013                count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2014              if (rc < 0)
2015                fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2016              else
2017                fprintf(outfile, "  C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2018              }
2019    
2020          for (i = 0; i < 32; i++)          for (i = 0; i < 32; i++)
2021            {            {
2022            if ((getstrings & (1 << i)) != 0)            if ((getstrings & (1 << i)) != 0)
# Line 1308  while (!done) Line 2029  while (!done)
2029              else              else
2030                {                {
2031                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
               /* free((void *)substring); */  
2032                pcre_free_substring(substring);                pcre_free_substring(substring);
2033                }                }
2034              }              }
2035            }            }
2036    
2037            for (getnamesptr = getnames;
2038                 *getnamesptr != 0;
2039                 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2040              {
2041              const char *substring;
2042              int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2043                count, (char *)getnamesptr, &substring);
2044              if (rc < 0)
2045                fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2046              else
2047                {
2048                fprintf(outfile, "  G %s (%d) %s\n", substring, rc, getnamesptr);
2049                pcre_free_substring(substring);
2050                }
2051              }
2052    
2053          if (getlist)          if (getlist)
2054            {            {
2055            const char **stringlist;            const char **stringlist;
# Line 1333  while (!done) Line 2069  while (!done)
2069            }            }
2070          }          }
2071    
2072          /* There was a partial match */
2073    
2074          else if (count == PCRE_ERROR_PARTIAL)
2075            {
2076            fprintf(outfile, "Partial match");
2077    #if !defined NODFA
2078            if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
2079              fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
2080                bptr + use_offsets[0]);
2081    #endif
2082            fprintf(outfile, "\n");
2083            break;  /* Out of the /g loop */
2084            }
2085    
2086        /* Failed to match. If this is a /g or /G loop and we previously set        /* Failed to match. If this is a /g or /G loop and we previously set
2087        g_notempty after a null match, this is not necessarily the end.        g_notempty after a null match, this is not necessarily the end.
2088        We want to advance the start offset, and continue. Fudge the offset        We want to advance the start offset, and continue. In the case of UTF-8
2089        values to achieve this. We won't be at the end of the string - that        matching, the advance must be one character, not one byte. Fudge the
2090        was checked before setting g_notempty. */        offset values to achieve this. We won't be at the end of the string -
2091          that was checked before setting g_notempty. */
2092    
2093        else        else
2094          {          {
2095          if (g_notempty != 0)          if (g_notempty != 0)
2096            {            {
2097              int onechar = 1;
2098            use_offsets[0] = start_offset;            use_offsets[0] = start_offset;
2099            use_offsets[1] = start_offset + 1;            if (use_utf8)
2100                {
2101                while (start_offset + onechar < len)
2102                  {
2103                  int tb = bptr[start_offset+onechar];
2104                  if (tb <= 127) break;
2105                  tb &= 0xc0;
2106                  if (tb != 0 && tb != 0xc0) onechar++;
2107                  }
2108                }
2109              use_offsets[1] = start_offset + onechar;
2110            }            }
2111          else          else
2112            {            {
2113            if (gmatched == 0)   /* Error if no previous matches */            if (count == PCRE_ERROR_NOMATCH)
2114              {              {
2115              if (count == -1) fprintf(outfile, "No match\n");              if (gmatched == 0) fprintf(outfile, "No match\n");
               else fprintf(outfile, "Error %d\n", count);  
2116              }              }
2117              else fprintf(outfile, "Error %d\n", count);
2118            break;  /* Out of the /g loop */            break;  /* Out of the /g loop */
2119            }            }
2120          }          }
# Line 1387  while (!done) Line 2149  while (!done)
2149          len -= use_offsets[1];          len -= use_offsets[1];
2150          }          }
2151        }  /* End of loop for /g and /G */        }  /* End of loop for /g and /G */
2152    
2153        NEXT_DATA: continue;
2154      }    /* End of loop for data lines */      }    /* End of loop for data lines */
2155    
2156    CONTINUE:    CONTINUE:
# Line 1395  while (!done) Line 2159  while (!done)
2159    if (posix || do_posix) regfree(&preg);    if (posix || do_posix) regfree(&preg);
2160  #endif  #endif
2161    
2162    if (re != NULL) free(re);    if (re != NULL) new_free(re);
2163    if (extra != NULL) free(extra);    if (extra != NULL) new_free(extra);
2164    if (tables != NULL)    if (tables != NULL)
2165      {      {
2166      free((void *)tables);      new_free((void *)tables);
2167      setlocale(LC_CTYPE, "C");      setlocale(LC_CTYPE, "C");
2168      }      }
2169    }    }
2170    
2171  fprintf(outfile, "\n");  if (infile == stdin) fprintf(outfile, "\n");
2172  return 0;  
2173    EXIT:
2174    
2175    if (infile != NULL && infile != stdin) fclose(infile);
2176    if (outfile != NULL && outfile != stdout) fclose(outfile);
2177    
2178    free(buffer);
2179    free(dbuffer);
2180    free(pbuffer);
2181    free(offsets);
2182    
2183    return yield;
2184  }  }
2185    
2186  /* End */  /* End of pcretest.c */

Legend:
Removed from v.67  
changed lines
  Added in v.91

  ViewVC Help
Powered by ViewVC 1.1.5