/[pcre]/code/branches/pcre16/pcretest.c
ViewVC logotype

Diff of /code/branches/pcre16/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 87 by nigel, Sat Feb 24 21:41:21 2007 UTC revision 147 by ph10, Mon Apr 16 13:24:37 2007 UTC
# Line 44  POSSIBILITY OF SUCH DAMAGE. Line 44  POSSIBILITY OF SUCH DAMAGE.
44  #include <locale.h>  #include <locale.h>
45  #include <errno.h>  #include <errno.h>
46    
 #define PCRE_SPY        /* For Win32 build, import data, not export */  
47    
48  /* We include pcre_internal.h because we need the internal info for displaying  /* A number of things vary for Windows builds. Originally, pcretest opened its
49  the results of pcre_study() and we also need to know about the internal  input and output without "b"; then I was told that "b" was needed in some
50  macros, structures, and other internal data values; pcretest has "inside  environments, so it was added for release 5.0 to both the input and output. (It
51  information" compared to a program that strictly follows the PCRE API. */  makes no difference on Unix-like systems.) Later I was told that it is wrong
52    for the input on Windows. I've now abstracted the modes into two macros that
53    are set here, to make it easier to fiddle with them, and removed "b" from the
54    input mode under Windows. */
55    
56    #if defined(_WIN32) || defined(WIN32)
57    #include <io.h>                /* For _setmode() */
58    #include <fcntl.h>             /* For _O_BINARY */
59    #define INPUT_MODE   "r"
60    #define OUTPUT_MODE  "wb"
61    
62    #else
63    #include <sys/time.h>          /* These two includes are needed */
64    #include <sys/resource.h>      /* for setrlimit(). */
65    #define INPUT_MODE   "rb"
66    #define OUTPUT_MODE  "wb"
67    #endif
68    
69    
70    /* We have to include pcre_internal.h because we need the internal info for
71    displaying the results of pcre_study() and we also need to know about the
72    internal macros, structures, and other internal data values; pcretest has
73    "inside information" compared to a program that strictly follows the PCRE API.
74    
75    Although pcre_internal.h does itself include pcre.h, we explicitly include it
76    here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
77    appropriately for an application, not for building PCRE. */
78    
79    #include "pcre.h"
80  #include "pcre_internal.h"  #include "pcre_internal.h"
81    
82  /* We need access to the data tables that PCRE uses. So as not to have to keep  /* We need access to the data tables that PCRE uses. So as not to have to keep
# Line 70  symbols to prevent clashes. */ Line 96  symbols to prevent clashes. */
96    
97  /* We also need the pcre_printint() function for printing out compiled  /* We also need the pcre_printint() function for printing out compiled
98  patterns. This function is in a separate file so that it can be included in  patterns. This function is in a separate file so that it can be included in
99  pcre_compile.c when that module is compiled with debugging enabled. */  pcre_compile.c when that module is compiled with debugging enabled.
100    
101    The definition of the macro PRINTABLE, which determines whether to print an
102    output character as-is or as a hex value when showing compiled patterns, is
103    contained in this file. We uses it here also, in cases when the locale has not
104    been explicitly changed, so as to get consistent output from systems that
105    differ in their output from isprint() even in the "C" locale. */
106    
107  #include "pcre_printint.src"  #include "pcre_printint.src"
108    
109    #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
110    
111    
112  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
113  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 83  Makefile. */ Line 117  Makefile. */
117  #include "pcreposix.h"  #include "pcreposix.h"
118  #endif  #endif
119    
120  /* It is also possible, for the benefit of the version imported into Exim, to  /* It is also possible, for the benefit of the version currently imported into
121  build pcretest without support for UTF8 (define NOUTF8), without the interface  Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
122  to the DFA matcher (NODFA), and without the doublecheck of the old "info"  interface to the DFA matcher (NODFA), and without the doublecheck of the old
123  function (define NOINFOCHECK). */  "info" function (define NOINFOCHECK). In fact, we automatically cut out the
124    UTF8 support if PCRE is built without it. */
125    
126    #ifndef SUPPORT_UTF8
127    #ifndef NOUTF8
128    #define NOUTF8
129    #endif
130    #endif
131    
132    
133  /* Other parameters */  /* Other parameters */
# Line 99  function (define NOINFOCHECK). */ Line 140  function (define NOINFOCHECK). */
140  #endif  #endif
141  #endif  #endif
142    
143  #define LOOPREPEAT 500000  /* This is the default loop count for timing. */
   
 #define BUFFER_SIZE 30000  
 #define PBUFFER_SIZE BUFFER_SIZE  
 #define DBUFFER_SIZE BUFFER_SIZE  
144    
145    #define LOOPREPEAT 500000
146    
147  /* Static variables */  /* Static variables */
148    
# Line 115  static int callout_extra; Line 153  static int callout_extra;
153  static int callout_fail_count;  static int callout_fail_count;
154  static int callout_fail_id;  static int callout_fail_id;
155  static int first_callout;  static int first_callout;
156    static int locale_set = 0;
157  static int show_malloc;  static int show_malloc;
158  static int use_utf8;  static int use_utf8;
159  static size_t gotten_store;  static size_t gotten_store;
160    
161    /* The buffers grow automatically if very long input lines are encountered. */
162    
163    static int buffer_size = 50000;
164    static uschar *buffer = NULL;
165    static uschar *dbuffer = NULL;
166  static uschar *pbuffer = NULL;  static uschar *pbuffer = NULL;
167    
168    
169    
170  /*************************************************  /*************************************************
171    *        Read or extend an input line            *
172    *************************************************/
173    
174    /* Input lines are read into buffer, but both patterns and data lines can be
175    continued over multiple input lines. In addition, if the buffer fills up, we
176    want to automatically expand it so as to be able to handle extremely large
177    lines that are needed for certain stress tests. When the input buffer is
178    expanded, the other two buffers must also be expanded likewise, and the
179    contents of pbuffer, which are a copy of the input for callouts, must be
180    preserved (for when expansion happens for a data line). This is not the most
181    optimal way of handling this, but hey, this is just a test program!
182    
183    Arguments:
184      f            the file to read
185      start        where in buffer to start (this *must* be within buffer)
186    
187    Returns:       pointer to the start of new data
188                   could be a copy of start, or could be moved
189                   NULL if no data read and EOF reached
190    */
191    
192    static uschar *
193    extend_inputline(FILE *f, uschar *start)
194    {
195    uschar *here = start;
196    
197    for (;;)
198      {
199      int rlen = buffer_size - (here - buffer);
200    
201      if (rlen > 1000)
202        {
203        int dlen;
204        if (fgets((char *)here, rlen,  f) == NULL)
205          return (here == start)? NULL : start;
206        dlen = (int)strlen((char *)here);
207        if (dlen > 0 && here[dlen - 1] == '\n') return start;
208        here += dlen;
209        }
210    
211      else
212        {
213        int new_buffer_size = 2*buffer_size;
214        uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
215        uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
216        uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
217    
218        if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
219          {
220          fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
221          exit(1);
222          }
223    
224        memcpy(new_buffer, buffer, buffer_size);
225        memcpy(new_pbuffer, pbuffer, buffer_size);
226    
227        buffer_size = new_buffer_size;
228    
229        start = new_buffer + (start - buffer);
230        here = new_buffer + (here - buffer);
231    
232        free(buffer);
233        free(dbuffer);
234        free(pbuffer);
235    
236        buffer = new_buffer;
237        dbuffer = new_dbuffer;
238        pbuffer = new_pbuffer;
239        }
240      }
241    
242    return NULL;  /* Control never gets here */
243    }
244    
245    
246    
247    
248    
249    
250    
251    /*************************************************
252  *          Read number from string               *  *          Read number from string               *
253  *************************************************/  *************************************************/
254    
255  /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess  /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
256  around with conditional compilation, just do the job by hand. It is only used  around with conditional compilation, just do the job by hand. It is only used
257  for unpicking the -o argument, so just keep it simple.  for unpicking arguments, so just keep it simple.
258    
259  Arguments:  Arguments:
260    str           string to be converted    str           string to be converted
# Line 159  return(result); Line 284  return(result);
284  and returns the value of the character.  and returns the value of the character.
285    
286  Argument:  Argument:
287    buffer   a pointer to the byte vector    utf8bytes   a pointer to the byte vector
288    vptr     a pointer to an int to receive the value    vptr        a pointer to an int to receive the value
289    
290  Returns:   >  0 => the number of bytes consumed  Returns:      >  0 => the number of bytes consumed
291             -6 to 0 => malformed UTF-8 character at offset = (-return)                -6 to 0 => malformed UTF-8 character at offset = (-return)
292  */  */
293    
294  #if !defined NOUTF8  #if !defined NOUTF8
295    
296  static int  static int
297  utf82ord(unsigned char *buffer, int *vptr)  utf82ord(unsigned char *utf8bytes, int *vptr)
298  {  {
299  int c = *buffer++;  int c = *utf8bytes++;
300  int d = c;  int d = c;
301  int i, j, s;  int i, j, s;
302    
# Line 191  d = (c & utf8_table3[i]) << s; Line 316  d = (c & utf8_table3[i]) << s;
316    
317  for (j = 0; j < i; j++)  for (j = 0; j < i; j++)
318    {    {
319    c = *buffer++;    c = *utf8bytes++;
320    if ((c & 0xc0) != 0x80) return -(j+1);    if ((c & 0xc0) != 0x80) return -(j+1);
321    s -= 6;    s -= 6;
322    d |= (c & 0x3f) << s;    d |= (c & 0x3f) << s;
# Line 222  and encodes it as a UTF-8 character in 0 Line 347  and encodes it as a UTF-8 character in 0
347    
348  Arguments:  Arguments:
349    cvalue     the character value    cvalue     the character value
350    buffer     pointer to buffer for result - at least 6 bytes long    utf8bytes  pointer to buffer for result - at least 6 bytes long
351    
352  Returns:     number of characters placed in the buffer  Returns:     number of characters placed in the buffer
353  */  */
354    
355    #if !defined NOUTF8
356    
357  static int  static int
358  ord2utf8(int cvalue, uschar *buffer)  ord2utf8(int cvalue, uschar *utf8bytes)
359  {  {
360  register int i, j;  register int i, j;
361  for (i = 0; i < utf8_table1_size; i++)  for (i = 0; i < utf8_table1_size; i++)
362    if (cvalue <= utf8_table1[i]) break;    if (cvalue <= utf8_table1[i]) break;
363  buffer += i;  utf8bytes += i;
364  for (j = i; j > 0; j--)  for (j = i; j > 0; j--)
365   {   {
366   *buffer-- = 0x80 | (cvalue & 0x3f);   *utf8bytes-- = 0x80 | (cvalue & 0x3f);
367   cvalue >>= 6;   cvalue >>= 6;
368   }   }
369  *buffer = utf8_table2[i] | cvalue;  *utf8bytes = utf8_table2[i] | cvalue;
370  return i + 1;  return i + 1;
371  }  }
372    
373    #endif
374    
375    
376    
377  /*************************************************  /*************************************************
# Line 269  while (length-- > 0) Line 398  while (length-- > 0)
398        {        {
399        length -= rc - 1;        length -= rc - 1;
400        p += rc;        p += rc;
401        if (c < 256 && isprint(c))        if (PRINTHEX(c))
402          {          {
403          if (f != NULL) fprintf(f, "%c", c);          if (f != NULL) fprintf(f, "%c", c);
404          yield++;          yield++;
405          }          }
406        else        else
407          {          {
408          int n;          int n = 4;
409          if (f != NULL) fprintf(f, "\\x{%02x}%n", c, &n);          if (f != NULL) fprintf(f, "\\x{%02x}", c);
410          yield += n;          yield += (n <= 0x000000ff)? 2 :
411                     (n <= 0x00000fff)? 3 :
412                     (n <= 0x0000ffff)? 4 :
413                     (n <= 0x000fffff)? 5 : 6;
414          }          }
415        continue;        continue;
416        }        }
# Line 287  while (length-- > 0) Line 419  while (length-- > 0)
419    
420     /* Not UTF-8, or malformed UTF-8  */     /* Not UTF-8, or malformed UTF-8  */
421    
422    if (isprint(c = *(p++)))    c = *p++;
423      if (PRINTHEX(c))
424      {      {
425      if (f != NULL) fprintf(f, "%c", c);      if (f != NULL) fprintf(f, "%c", c);
426      yield++;      yield++;
# Line 461  if ((rc = pcre_fullinfo(re, study, optio Line 594  if ((rc = pcre_fullinfo(re, study, optio
594  *         Byte flipping function                 *  *         Byte flipping function                 *
595  *************************************************/  *************************************************/
596    
597  static long int  static unsigned long int
598  byteflip(long int value, int n)  byteflip(unsigned long int value, int n)
599  {  {
600  if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);  if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
601  return ((value & 0x000000ff) << 24) |  return ((value & 0x000000ff) << 24) |
# Line 526  return count; Line 659  return count;
659    
660    
661  /*************************************************  /*************************************************
662    *         Check newline indicator                *
663    *************************************************/
664    
665    /* This is used both at compile and run-time to check for <xxx> escapes, where
666    xxx is LF, CR, CRLF, or ANY. Print a message and return 0 if there is no match.
667    
668    Arguments:
669      p           points after the leading '<'
670      f           file for error message
671    
672    Returns:      appropriate PCRE_NEWLINE_xxx flags, or 0
673    */
674    
675    static int
676    check_newline(uschar *p, FILE *f)
677    {
678    if (strncmp((char *)p, "cr>", 3) == 0) return PCRE_NEWLINE_CR;
679    if (strncmp((char *)p, "lf>", 3) == 0) return PCRE_NEWLINE_LF;
680    if (strncmp((char *)p, "crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
681    if (strncmp((char *)p, "any>", 4) == 0) return PCRE_NEWLINE_ANY;
682    fprintf(f, "Unknown newline type at: <%s\n", p);
683    return 0;
684    }
685    
686    
687    
688    /*************************************************
689    *             Usage function                     *
690    *************************************************/
691    
692    static void
693    usage(void)
694    {
695    printf("Usage:     pcretest [options] [<input> [<output>]]\n");
696    printf("  -b       show compiled code (bytecode)\n");
697    printf("  -C       show PCRE compile-time options and exit\n");
698    printf("  -d       debug: show compiled code and information (-b and -i)\n");
699    #if !defined NODFA
700    printf("  -dfa     force DFA matching for all subjects\n");
701    #endif
702    printf("  -help    show usage information\n");
703    printf("  -i       show information about compiled patterns\n"
704           "  -m       output memory used information\n"
705           "  -o <n>   set size of offsets vector to <n>\n");
706    #if !defined NOPOSIX
707    printf("  -p       use POSIX interface\n");
708    #endif
709    printf("  -q       quiet: do not output PCRE version number at start\n");
710    printf("  -S <n>   set stack size to <n> megabytes\n");
711    printf("  -s       output store (memory) used information\n"
712           "  -t       time compilation and execution\n");
713    printf("  -t <n>   time compilation and execution, repeating <n> times\n");
714    printf("  -tm      time execution (matching) only\n");
715    printf("  -tm <n>  time execution (matching) only, repeating <n> times\n");
716    }
717    
718    
719    
720    /*************************************************
721  *                Main Program                    *  *                Main Program                    *
722  *************************************************/  *************************************************/
723    
# Line 540  int options = 0; Line 732  int options = 0;
732  int study_options = 0;  int study_options = 0;
733  int op = 1;  int op = 1;
734  int timeit = 0;  int timeit = 0;
735    int timeitm = 0;
736  int showinfo = 0;  int showinfo = 0;
737  int showstore = 0;  int showstore = 0;
738  int quiet = 0;  int quiet = 0;
# Line 553  int debug = 0; Line 746  int debug = 0;
746  int done = 0;  int done = 0;
747  int all_use_dfa = 0;  int all_use_dfa = 0;
748  int yield = 0;  int yield = 0;
749    int stack_size;
750    
751  unsigned char *buffer;  /* These vectors store, end-to-end, a list of captured substring names. Assume
752  unsigned char *dbuffer;  that 1024 is plenty long enough for the few names we'll be testing. */
753    
754    uschar copynames[1024];
755    uschar getnames[1024];
756    
757    uschar *copynamesptr;
758    uschar *getnamesptr;
759    
760  /* Get buffers from malloc() so that Electric Fence will check their misuse  /* Get buffers from malloc() so that Electric Fence will check their misuse
761  when I am debugging. */  when I am debugging. They grow automatically when very long lines are read. */
762    
763  buffer = (unsigned char *)malloc(BUFFER_SIZE);  buffer = (unsigned char *)malloc(buffer_size);
764  dbuffer = (unsigned char *)malloc(DBUFFER_SIZE);  dbuffer = (unsigned char *)malloc(buffer_size);
765  pbuffer = (unsigned char *)malloc(PBUFFER_SIZE);  pbuffer = (unsigned char *)malloc(buffer_size);
   
 /* The outfile variable is static so that new_malloc can use it. The _setmode()  
 stuff is some magic that I don't understand, but which apparently does good  
 things in Windows. It's related to line terminations.  */  
766    
767  #if defined(_WIN32) || defined(WIN32)  /* The outfile variable is static so that new_malloc can use it. */
 _setmode( _fileno( stdout ), 0x8000 );  
 #endif  /* defined(_WIN32) || defined(WIN32) */  
768    
769  outfile = stdout;  outfile = stdout;
770    
771    /* The following  _setmode() stuff is some Windows magic that tells its runtime
772    library to translate CRLF into a single LF character. At least, that's what
773    I've been told: never having used Windows I take this all on trust. Originally
774    it set 0x8000, but then I was advised that _O_BINARY was better. */
775    
776    #if defined(_WIN32) || defined(WIN32)
777    _setmode( _fileno( stdout ), _O_BINARY );
778    #endif
779    
780  /* Scan options */  /* Scan options */
781    
782  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
# Line 582  while (argc > 1 && argv[op][0] == '-') Line 785  while (argc > 1 && argv[op][0] == '-')
785    
786    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
787      showstore = 1;      showstore = 1;
   else if (strcmp(argv[op], "-t") == 0) timeit = 1;  
788    else if (strcmp(argv[op], "-q") == 0) quiet = 1;    else if (strcmp(argv[op], "-q") == 0) quiet = 1;
789      else if (strcmp(argv[op], "-b") == 0) debug = 1;
790    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
791    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
792  #if !defined NODFA  #if !defined NODFA
# Line 596  while (argc > 1 && argv[op][0] == '-') Line 799  while (argc > 1 && argv[op][0] == '-')
799      op++;      op++;
800      argc--;      argc--;
801      }      }
802      else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
803        {
804        int both = argv[op][2] == 0;
805        int temp;
806        if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
807                         *endptr == 0))
808          {
809          timeitm = temp;
810          op++;
811          argc--;
812          }
813        else timeitm = LOOPREPEAT;
814        if (both) timeit = timeitm;
815        }
816      else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
817          ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
818            *endptr == 0))
819        {
820    #if defined(_WIN32) || defined(WIN32)
821        printf("PCRE: -S not supported on this OS\n");
822        exit(1);
823    #else
824        int rc;
825        struct rlimit rlim;
826        getrlimit(RLIMIT_STACK, &rlim);
827        rlim.rlim_cur = stack_size * 1024 * 1024;
828        rc = setrlimit(RLIMIT_STACK, &rlim);
829        if (rc != 0)
830          {
831        printf("PCRE: setrlimit() failed with error %d\n", rc);
832        exit(1);
833          }
834        op++;
835        argc--;
836    #endif
837        }
838  #if !defined NOPOSIX  #if !defined NOPOSIX
839    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(argv[op], "-p") == 0) posix = 1;
840  #endif  #endif
# Line 609  while (argc > 1 && argv[op][0] == '-') Line 848  while (argc > 1 && argv[op][0] == '-')
848      (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);      (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
849      printf("  %sUnicode properties support\n", rc? "" : "No ");      printf("  %sUnicode properties support\n", rc? "" : "No ");
850      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
851      printf("  Newline character is %s\n", (rc == '\r')? "CR" : "LF");      printf("  Newline sequence is %s\n", (rc == '\r')? "CR" :
852          (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :
853          (rc == -1)? "ANY" : "???");
854      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
855      printf("  Internal link size = %d\n", rc);      printf("  Internal link size = %d\n", rc);
856      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
# Line 620  while (argc > 1 && argv[op][0] == '-') Line 861  while (argc > 1 && argv[op][0] == '-')
861      printf("  Default recursion depth limit = %d\n", rc);      printf("  Default recursion depth limit = %d\n", rc);
862      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
863      printf("  Match recursion uses %s\n", rc? "stack" : "heap");      printf("  Match recursion uses %s\n", rc? "stack" : "heap");
864      exit(0);      goto EXIT;
865        }
866      else if (strcmp(argv[op], "-help") == 0 ||
867               strcmp(argv[op], "--help") == 0)
868        {
869        usage();
870        goto EXIT;
871      }      }
872    else    else
873      {      {
874      printf("** Unknown or malformed option %s\n", argv[op]);      printf("** Unknown or malformed option %s\n", argv[op]);
875      printf("Usage:   pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");      usage();
     printf("  -C     show PCRE compile-time options and exit\n");  
     printf("  -d     debug: show compiled code; implies -i\n");  
 #if !defined NODFA  
     printf("  -dfa   force DFA matching for all subjects\n");  
 #endif  
     printf("  -i     show information about compiled pattern\n"  
            "  -m     output memory used information\n"  
            "  -o <n> set size of offsets vector to <n>\n");  
 #if !defined NOPOSIX  
     printf("  -p     use POSIX interface\n");  
 #endif  
     printf("  -s     output store (memory) used information\n"  
            "  -t     time compilation and execution\n");  
876      yield = 1;      yield = 1;
877      goto EXIT;      goto EXIT;
878      }      }
# Line 662  if (offsets == NULL) Line 896  if (offsets == NULL)
896    
897  if (argc > 1)  if (argc > 1)
898    {    {
899    infile = fopen(argv[op], "rb");    infile = fopen(argv[op], INPUT_MODE);
900    if (infile == NULL)    if (infile == NULL)
901      {      {
902      printf("** Failed to open %s\n", argv[op]);      printf("** Failed to open %s\n", argv[op]);
# Line 673  if (argc > 1) Line 907  if (argc > 1)
907    
908  if (argc > 2)  if (argc > 2)
909    {    {
910    outfile = fopen(argv[op+1], "wb");    outfile = fopen(argv[op+1], OUTPUT_MODE);
911    if (outfile == NULL)    if (outfile == NULL)
912      {      {
913      printf("** Failed to open %s\n", argv[op+1]);      printf("** Failed to open %s\n", argv[op+1]);
# Line 713  while (!done) Line 947  while (!done)
947    size_t size, regex_gotten_store;    size_t size, regex_gotten_store;
948    int do_study = 0;    int do_study = 0;
949    int do_debug = debug;    int do_debug = debug;
950      int debug_lengths = 1;
951    int do_G = 0;    int do_G = 0;
952    int do_g = 0;    int do_g = 0;
953    int do_showinfo = showinfo;    int do_showinfo = showinfo;
954    int do_showrest = 0;    int do_showrest = 0;
955    int do_flip = 0;    int do_flip = 0;
956    int erroroffset, len, delimiter;    int erroroffset, len, delimiter, poffset;
957    
958    use_utf8 = 0;    use_utf8 = 0;
959    
960    if (infile == stdin) printf("  re> ");    if (infile == stdin) printf("  re> ");
961    if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL) break;    if (extend_inputline(infile, buffer) == NULL) break;
962    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
963    fflush(outfile);    fflush(outfile);
964    
# Line 735  while (!done) Line 970  while (!done)
970    
971    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
972      {      {
973      unsigned long int magic;      unsigned long int magic, get_options;
974      uschar sbuf[8];      uschar sbuf[8];
975      FILE *f;      FILE *f;
976    
# Line 783  while (!done) Line 1018  while (!done)
1018    
1019      /* Need to know if UTF-8 for printing data strings */      /* Need to know if UTF-8 for printing data strings */
1020    
1021      new_info(re, NULL, PCRE_INFO_OPTIONS, &options);      new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1022      use_utf8 = (options & PCRE_UTF8) != 0;      use_utf8 = (get_options & PCRE_UTF8) != 0;
1023    
1024      /* Now see if there is any following study data */      /* Now see if there is any following study data */
1025    
# Line 828  while (!done) Line 1063  while (!done)
1063      }      }
1064    
1065    pp = p;    pp = p;
1066      poffset = p - buffer;
1067    
1068    for(;;)    for(;;)
1069      {      {
# Line 838  while (!done) Line 1074  while (!done)
1074        pp++;        pp++;
1075        }        }
1076      if (*pp != 0) break;      if (*pp != 0) break;
   
     len = BUFFER_SIZE - (pp - buffer);  
     if (len < 256)  
       {  
       fprintf(outfile, "** Expression too long - missing delimiter?\n");  
       goto SKIP_DATA;  
       }  
   
1077      if (infile == stdin) printf("    > ");      if (infile == stdin) printf("    > ");
1078      if (fgets((char *)pp, len, infile) == NULL)      if ((pp = extend_inputline(infile, pp)) == NULL)
1079        {        {
1080        fprintf(outfile, "** Unexpected EOF\n");        fprintf(outfile, "** Unexpected EOF\n");
1081        done = 1;        done = 1;
# Line 856  while (!done) Line 1084  while (!done)
1084      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1085      }      }
1086    
1087      /* The buffer may have moved while being extended; reset the start of data
1088      pointer to the correct relative point in the buffer. */
1089    
1090      p = buffer + poffset;
1091    
1092    /* If the first character after the delimiter is backslash, make    /* If the first character after the delimiter is backslash, make
1093    the pattern end with backslash. This is purely to provide a way    the pattern end with backslash. This is purely to provide a way
1094    of testing for the error message when a pattern ends with backslash. */    of testing for the error message when a pattern ends with backslash. */
# Line 887  while (!done) Line 1120  while (!done)
1120    
1121        case '+': do_showrest = 1; break;        case '+': do_showrest = 1; break;
1122        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
1123          case 'B': do_debug = 1; break;
1124        case 'C': options |= PCRE_AUTO_CALLOUT; break;        case 'C': options |= PCRE_AUTO_CALLOUT; break;
1125        case 'D': do_debug = do_showinfo = 1; break;        case 'D': do_debug = do_showinfo = 1; break;
1126        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1127        case 'F': do_flip = 1; break;        case 'F': do_flip = 1; break;
1128        case 'G': do_G = 1; break;        case 'G': do_G = 1; break;
1129        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
1130          case 'J': options |= PCRE_DUPNAMES; break;
1131        case 'M': log_store = 1; break;        case 'M': log_store = 1; break;
1132        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1133    
# Line 903  while (!done) Line 1138  while (!done)
1138        case 'S': do_study = 1; break;        case 'S': do_study = 1; break;
1139        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
1140        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
1141          case 'Z': debug_lengths = 0; break;
1142        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1143        case '?': options |= PCRE_NO_UTF8_CHECK; break;        case '?': options |= PCRE_NO_UTF8_CHECK; break;
1144    
1145        case 'L':        case 'L':
1146        ppp = pp;        ppp = pp;
1147        /* The '\r' test here is so that it works on Windows */        /* The '\r' test here is so that it works on Windows. */
1148        while (*ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;        /* The '0' test is just in case this is an unterminated line. */
1149          while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1150        *ppp = 0;        *ppp = 0;
1151        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1152          {          {
1153          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1154          goto SKIP_DATA;          goto SKIP_DATA;
1155          }          }
1156          locale_set = 1;
1157        tables = pcre_maketables();        tables = pcre_maketables();
1158        pp = ppp;        pp = ppp;
1159        break;        break;
# Line 927  while (!done) Line 1165  while (!done)
1165        *pp = 0;        *pp = 0;
1166        break;        break;
1167    
1168          case '<':
1169            {
1170            int x = check_newline(pp, outfile);
1171            if (x == 0) goto SKIP_DATA;
1172            options |= x;
1173            while (*pp++ != '>');
1174            }
1175          break;
1176    
1177        case '\r':                      /* So that it works in Windows */        case '\r':                      /* So that it works in Windows */
1178        case '\n':        case '\n':
1179        case ' ':        case ' ':
# Line 961  while (!done) Line 1208  while (!done)
1208    
1209      if (rc != 0)      if (rc != 0)
1210        {        {
1211        (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);        (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1212        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1213        goto SKIP_DATA;        goto SKIP_DATA;
1214        }        }
# Line 973  while (!done) Line 1220  while (!done)
1220  #endif  /* !defined NOPOSIX */  #endif  /* !defined NOPOSIX */
1221    
1222      {      {
1223      if (timeit)      if (timeit > 0)
1224        {        {
1225        register int i;        register int i;
1226        clock_t time_taken;        clock_t time_taken;
1227        clock_t start_time = clock();        clock_t start_time = clock();
1228        for (i = 0; i < LOOPREPEAT; i++)        for (i = 0; i < timeit; i++)
1229          {          {
1230          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1231          if (re != NULL) free(re);          if (re != NULL) free(re);
1232          }          }
1233        time_taken = clock() - start_time;        time_taken = clock() - start_time;
1234        fprintf(outfile, "Compile time %.3f milliseconds\n",        fprintf(outfile, "Compile time %.4f milliseconds\n",
1235          (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /          (((double)time_taken * 1000.0) / (double)timeit) /
1236            (double)CLOCKS_PER_SEC);            (double)CLOCKS_PER_SEC);
1237        }        }
1238    
# Line 1002  while (!done) Line 1249  while (!done)
1249          {          {
1250          for (;;)          for (;;)
1251            {            {
1252            if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)            if (extend_inputline(infile, buffer) == NULL)
1253              {              {
1254              done = 1;              done = 1;
1255              goto CONTINUE;              goto CONTINUE;
# Line 1037  while (!done) Line 1284  while (!done)
1284    
1285      if (do_study)      if (do_study)
1286        {        {
1287        if (timeit)        if (timeit > 0)
1288          {          {
1289          register int i;          register int i;
1290          clock_t time_taken;          clock_t time_taken;
1291          clock_t start_time = clock();          clock_t start_time = clock();
1292          for (i = 0; i < LOOPREPEAT; i++)          for (i = 0; i < timeit; i++)
1293            extra = pcre_study(re, study_options, &error);            extra = pcre_study(re, study_options, &error);
1294          time_taken = clock() - start_time;          time_taken = clock() - start_time;
1295          if (extra != NULL) free(extra);          if (extra != NULL) free(extra);
1296          fprintf(outfile, "  Study time %.3f milliseconds\n",          fprintf(outfile, "  Study time %.4f milliseconds\n",
1297            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /            (((double)time_taken * 1000.0) / (double)timeit) /
1298              (double)CLOCKS_PER_SEC);              (double)CLOCKS_PER_SEC);
1299          }          }
1300        extra = pcre_study(re, study_options, &error);        extra = pcre_study(re, study_options, &error);
# Line 1090  while (!done) Line 1337  while (!done)
1337    
1338      SHOW_INFO:      SHOW_INFO:
1339    
1340        if (do_debug)
1341          {
1342          fprintf(outfile, "------------------------------------------------------------------\n");
1343          pcre_printint(re, outfile, debug_lengths);
1344          }
1345    
1346      if (do_showinfo)      if (do_showinfo)
1347        {        {
1348        unsigned long int get_options, all_options;        unsigned long int get_options, all_options;
# Line 1100  while (!done) Line 1353  while (!done)
1353        int nameentrysize, namecount;        int nameentrysize, namecount;
1354        const uschar *nametable;        const uschar *nametable;
1355    
       if (do_debug)  
         {  
         fprintf(outfile, "------------------------------------------------------------------\n");  
         pcre_printint(re, outfile);  
         }  
   
1356        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1357        new_info(re, NULL, PCRE_INFO_SIZE, &size);        new_info(re, NULL, PCRE_INFO_SIZE, &size);
1358        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
# Line 1163  while (!done) Line 1410  while (!done)
1410        if (do_flip)        if (do_flip)
1411          {          {
1412          all_options = byteflip(all_options, sizeof(all_options));          all_options = byteflip(all_options, sizeof(all_options));
1413          }           }
1414    
1415        if ((all_options & PCRE_NOPARTIAL) != 0)        if ((all_options & PCRE_NOPARTIAL) != 0)
1416          fprintf(outfile, "Partial matching not supported\n");          fprintf(outfile, "Partial matching not supported\n");
1417    
1418        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
1419          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1420            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1421            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1422            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
# Line 1181  while (!done) Line 1428  while (!done)
1428            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1429            ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",            ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1430            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1431            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "");            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1432              ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1433    
1434        if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)        switch (get_options & PCRE_NEWLINE_BITS)
1435          fprintf(outfile, "Case state changes\n");          {
1436            case PCRE_NEWLINE_CR:
1437            fprintf(outfile, "Forced newline sequence: CR\n");
1438            break;
1439    
1440            case PCRE_NEWLINE_LF:
1441            fprintf(outfile, "Forced newline sequence: LF\n");
1442            break;
1443    
1444            case PCRE_NEWLINE_CRLF:
1445            fprintf(outfile, "Forced newline sequence: CRLF\n");
1446            break;
1447    
1448            case PCRE_NEWLINE_ANY:
1449            fprintf(outfile, "Forced newline sequence: ANY\n");
1450            break;
1451    
1452            default:
1453            break;
1454            }
1455    
1456        if (first_char == -1)        if (first_char == -1)
1457          {          {
1458          fprintf(outfile, "First char at start or follows \\n\n");          fprintf(outfile, "First char at start or follows newline\n");
1459          }          }
1460        else if (first_char < 0)        else if (first_char < 0)
1461          {          {
# Line 1199  while (!done) Line 1466  while (!done)
1466          int ch = first_char & 255;          int ch = first_char & 255;
1467          const char *caseless = ((first_char & REQ_CASELESS) == 0)?          const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1468            "" : " (caseless)";            "" : " (caseless)";
1469          if (isprint(ch))          if (PRINTHEX(ch))
1470            fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);            fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1471          else          else
1472            fprintf(outfile, "First char = %d%s\n", ch, caseless);            fprintf(outfile, "First char = %d%s\n", ch, caseless);
# Line 1214  while (!done) Line 1481  while (!done)
1481          int ch = need_char & 255;          int ch = need_char & 255;
1482          const char *caseless = ((need_char & REQ_CASELESS) == 0)?          const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1483            "" : " (caseless)";            "" : " (caseless)";
1484          if (isprint(ch))          if (PRINTHEX(ch))
1485            fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);            fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1486          else          else
1487            fprintf(outfile, "Need char = %d%s\n", ch, caseless);            fprintf(outfile, "Need char = %d%s\n", ch, caseless);
# Line 1250  while (!done) Line 1517  while (!done)
1517                    fprintf(outfile, "\n  ");                    fprintf(outfile, "\n  ");
1518                    c = 2;                    c = 2;
1519                    }                    }
1520                  if (isprint(i) && i != ' ')                  if (PRINTHEX(i) && i != ' ')
1521                    {                    {
1522                    fprintf(outfile, "%c ", i);                    fprintf(outfile, "%c ", i);
1523                    c += 2;                    c += 2;
# Line 1309  while (!done) Line 1576  while (!done)
1576                  strerror(errno));                  strerror(errno));
1577                }                }
1578              else fprintf(outfile, "Study data written to %s\n", to_file);              else fprintf(outfile, "Study data written to %s\n", to_file);
1579    
1580              }              }
1581            }            }
1582          fclose(f);          fclose(f);
# Line 1326  while (!done) Line 1594  while (!done)
1594    for (;;)    for (;;)
1595      {      {
1596      uschar *q;      uschar *q;
1597      uschar *bptr = dbuffer;      uschar *bptr;
1598      int *use_offsets = offsets;      int *use_offsets = offsets;
1599      int use_size_offsets = size_offsets;      int use_size_offsets = size_offsets;
1600      int callout_data = 0;      int callout_data = 0;
# Line 1343  while (!done) Line 1611  while (!done)
1611    
1612      options = 0;      options = 0;
1613    
1614        *copynames = 0;
1615        *getnames = 0;
1616    
1617        copynamesptr = copynames;
1618        getnamesptr = getnames;
1619    
1620      pcre_callout = callout;      pcre_callout = callout;
1621      first_callout = 1;      first_callout = 1;
1622      callout_extra = 0;      callout_extra = 0;
# Line 1351  while (!done) Line 1625  while (!done)
1625      callout_fail_id = -1;      callout_fail_id = -1;
1626      show_malloc = 0;      show_malloc = 0;
1627    
1628      if (infile == stdin) printf("data> ");      if (extra != NULL) extra->flags &=
1629      if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)        ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1630    
1631        len = 0;
1632        for (;;)
1633        {        {
1634        done = 1;        if (infile == stdin) printf("data> ");
1635        goto CONTINUE;        if (extend_inputline(infile, buffer + len) == NULL)
1636            {
1637            if (len > 0) break;
1638            done = 1;
1639            goto CONTINUE;
1640            }
1641          if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1642          len = (int)strlen((char *)buffer);
1643          if (buffer[len-1] == '\n') break;
1644        }        }
     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);  
1645    
     len = (int)strlen((char *)buffer);  
1646      while (len > 0 && isspace(buffer[len-1])) len--;      while (len > 0 && isspace(buffer[len-1])) len--;
1647      buffer[len] = 0;      buffer[len] = 0;
1648      if (len == 0) break;      if (len == 0) break;
# Line 1367  while (!done) Line 1650  while (!done)
1650      p = buffer;      p = buffer;
1651      while (isspace(*p)) p++;      while (isspace(*p)) p++;
1652    
1653      q = dbuffer;      bptr = q = dbuffer;
1654      while ((c = *p++) != 0)      while ((c = *p++) != 0)
1655        {        {
1656        int i = 0;        int i = 0;
# Line 1389  while (!done) Line 1672  while (!done)
1672          c -= '0';          c -= '0';
1673          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1674            c = c * 8 + *p++ - '0';            c = c * 8 + *p++ - '0';
1675    
1676    #if !defined NOUTF8
1677            if (use_utf8 && c > 255)
1678              {
1679              unsigned char buff8[8];
1680              int ii, utn;
1681              utn = ord2utf8(c, buff8);
1682              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1683              c = buff8[ii];   /* Last byte */
1684              }
1685    #endif
1686          break;          break;
1687    
1688          case 'x':          case 'x':
# Line 1450  while (!done) Line 1744  while (!done)
1744            }            }
1745          else if (isalnum(*p))          else if (isalnum(*p))
1746            {            {
1747            uschar name[256];            uschar *npp = copynamesptr;
           uschar *npp = name;  
1748            while (isalnum(*p)) *npp++ = *p++;            while (isalnum(*p)) *npp++ = *p++;
1749              *npp++ = 0;
1750            *npp = 0;            *npp = 0;
1751            n = pcre_get_stringnumber(re, (char *)name);            n = pcre_get_stringnumber(re, (char *)copynamesptr);
1752            if (n < 0)            if (n < 0)
1753              fprintf(outfile, "no parentheses with name \"%s\"\n", name);              fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1754            else copystrings |= 1 << n;            copynamesptr = npp;
1755            }            }
1756          else if (*p == '+')          else if (*p == '+')
1757            {            {
# Line 1518  while (!done) Line 1812  while (!done)
1812            }            }
1813          else if (isalnum(*p))          else if (isalnum(*p))
1814            {            {
1815            uschar name[256];            uschar *npp = getnamesptr;
           uschar *npp = name;  
1816            while (isalnum(*p)) *npp++ = *p++;            while (isalnum(*p)) *npp++ = *p++;
1817              *npp++ = 0;
1818            *npp = 0;            *npp = 0;
1819            n = pcre_get_stringnumber(re, (char *)name);            n = pcre_get_stringnumber(re, (char *)getnamesptr);
1820            if (n < 0)            if (n < 0)
1821              fprintf(outfile, "no parentheses with name \"%s\"\n", name);              fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1822            else getstrings |= 1 << n;            getnamesptr = npp;
1823            }            }
1824          continue;          continue;
1825    
# Line 1564  while (!done) Line 1858  while (!done)
1858          options |= PCRE_PARTIAL;          options |= PCRE_PARTIAL;
1859          continue;          continue;
1860    
1861            case 'Q':
1862            while(isdigit(*p)) n = n * 10 + *p++ - '0';
1863            if (extra == NULL)
1864              {
1865              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1866              extra->flags = 0;
1867              }
1868            extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1869            extra->match_limit_recursion = n;
1870            continue;
1871    
1872            case 'q':
1873            while(isdigit(*p)) n = n * 10 + *p++ - '0';
1874            if (extra == NULL)
1875              {
1876              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1877              extra->flags = 0;
1878              }
1879            extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1880            extra->match_limit = n;
1881            continue;
1882    
1883  #if !defined NODFA  #if !defined NODFA
1884          case 'R':          case 'R':
1885          options |= PCRE_DFA_RESTART;          options |= PCRE_DFA_RESTART;
# Line 1581  while (!done) Line 1897  while (!done)
1897          case '?':          case '?':
1898          options |= PCRE_NO_UTF8_CHECK;          options |= PCRE_NO_UTF8_CHECK;
1899          continue;          continue;
1900    
1901            case '<':
1902              {
1903              int x = check_newline(p, outfile);
1904              if (x == 0) goto NEXT_DATA;
1905              options |= x;
1906              while (*p++ != '>');
1907              }
1908            continue;
1909          }          }
1910        *q++ = c;        *q++ = c;
1911        }        }
# Line 1611  while (!done) Line 1936  while (!done)
1936    
1937        if (rc != 0)        if (rc != 0)
1938          {          {
1939          (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);          (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1940          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1941          }          }
1942        else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)        else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
# Line 1650  while (!done) Line 1975  while (!done)
1975    
1976      for (;; gmatched++)    /* Loop for /g or /G */      for (;; gmatched++)    /* Loop for /g or /G */
1977        {        {
1978        if (timeit)        if (timeitm > 0)
1979          {          {
1980          register int i;          register int i;
1981          clock_t time_taken;          clock_t time_taken;
# Line 1660  while (!done) Line 1985  while (!done)
1985          if (all_use_dfa || use_dfa)          if (all_use_dfa || use_dfa)
1986            {            {
1987            int workspace[1000];            int workspace[1000];
1988            for (i = 0; i < LOOPREPEAT; i++)            for (i = 0; i < timeitm; i++)
1989              count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,              count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
1990                options | g_notempty, use_offsets, use_size_offsets, workspace,                options | g_notempty, use_offsets, use_size_offsets, workspace,
1991                sizeof(workspace)/sizeof(int));                sizeof(workspace)/sizeof(int));
# Line 1668  while (!done) Line 1993  while (!done)
1993          else          else
1994  #endif  #endif
1995    
1996          for (i = 0; i < LOOPREPEAT; i++)          for (i = 0; i < timeitm; i++)
1997            count = pcre_exec(re, extra, (char *)bptr, len,            count = pcre_exec(re, extra, (char *)bptr, len,
1998              start_offset, options | g_notempty, use_offsets, use_size_offsets);              start_offset, options | g_notempty, use_offsets, use_size_offsets);
1999    
2000          time_taken = clock() - start_time;          time_taken = clock() - start_time;
2001          fprintf(outfile, "Execute time %.3f milliseconds\n",          fprintf(outfile, "Execute time %.4f milliseconds\n",
2002            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /            (((double)time_taken * 1000.0) / (double)timeitm) /
2003              (double)CLOCKS_PER_SEC);              (double)CLOCKS_PER_SEC);
2004          }          }
2005    
# Line 1690  while (!done) Line 2015  while (!done)
2015            extra->flags = 0;            extra->flags = 0;
2016            }            }
2017    
2018          count = check_match_limit(re, extra, bptr, len, start_offset,          (void)check_match_limit(re, extra, bptr, len, start_offset,
2019            options|g_notempty, use_offsets, use_size_offsets,            options|g_notempty, use_offsets, use_size_offsets,
2020            PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),            PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2021            PCRE_ERROR_MATCHLIMIT, "match()");            PCRE_ERROR_MATCHLIMIT, "match()");
# Line 1750  while (!done) Line 2075  while (!done)
2075    
2076        if (count >= 0)        if (count >= 0)
2077          {          {
2078          int i;          int i, maxcount;
2079    
2080    #if !defined NODFA
2081            if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2082    #endif
2083              maxcount = use_size_offsets/3;
2084    
2085            /* This is a check against a lunatic return value. */
2086    
2087            if (count > maxcount)
2088              {
2089              fprintf(outfile,
2090                "** PCRE error: returned count %d is too big for offset size %d\n",
2091                count, use_size_offsets);
2092              count = use_size_offsets/3;
2093              if (do_g || do_G)
2094                {
2095                fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2096                do_g = do_G = FALSE;        /* Break g/G loop */
2097                }
2098              }
2099    
2100          for (i = 0; i < count * 2; i += 2)          for (i = 0; i < count * 2; i += 2)
2101            {            {
2102            if (use_offsets[i] < 0)            if (use_offsets[i] < 0)
# Line 1778  while (!done) Line 2124  while (!done)
2124            {            {
2125            if ((copystrings & (1 << i)) != 0)            if ((copystrings & (1 << i)) != 0)
2126              {              {
2127              char copybuffer[16];              char copybuffer[256];
2128              int rc = pcre_copy_substring((char *)bptr, use_offsets, count,              int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2129                i, copybuffer, sizeof(copybuffer));                i, copybuffer, sizeof(copybuffer));
2130              if (rc < 0)              if (rc < 0)
# Line 1788  while (!done) Line 2134  while (!done)
2134              }              }
2135            }            }
2136    
2137            for (copynamesptr = copynames;
2138                 *copynamesptr != 0;
2139                 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2140              {
2141              char copybuffer[256];
2142              int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2143                count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2144              if (rc < 0)
2145                fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2146              else
2147                fprintf(outfile, "  C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2148              }
2149    
2150          for (i = 0; i < 32; i++)          for (i = 0; i < 32; i++)
2151            {            {
2152            if ((getstrings & (1 << i)) != 0)            if ((getstrings & (1 << i)) != 0)
# Line 1800  while (!done) Line 2159  while (!done)
2159              else              else
2160                {                {
2161                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
               /* free((void *)substring); */  
2162                pcre_free_substring(substring);                pcre_free_substring(substring);
2163                }                }
2164              }              }
2165            }            }
2166    
2167            for (getnamesptr = getnames;
2168                 *getnamesptr != 0;
2169                 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2170              {
2171              const char *substring;
2172              int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2173                count, (char *)getnamesptr, &substring);
2174              if (rc < 0)
2175                fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2176              else
2177                {
2178                fprintf(outfile, "  G %s (%d) %s\n", substring, rc, getnamesptr);
2179                pcre_free_substring(substring);
2180                }
2181              }
2182    
2183          if (getlist)          if (getlist)
2184            {            {
2185            const char **stringlist;            const char **stringlist;
# Line 1840  while (!done) Line 2214  while (!done)
2214          }          }
2215    
2216        /* Failed to match. If this is a /g or /G loop and we previously set        /* Failed to match. If this is a /g or /G loop and we previously set
2217        g_notempty after a null match, this is not necessarily the end.        g_notempty after a null match, this is not necessarily the end. We want
2218        We want to advance the start offset, and continue. In the case of UTF-8        to advance the start offset, and continue. We won't be at the end of the
2219        matching, the advance must be one character, not one byte. Fudge the        string - that was checked before setting g_notempty.
2220        offset values to achieve this. We won't be at the end of the string -  
2221        that was checked before setting g_notempty. */        Complication arises in the case when the newline option is "any".
2222          If the previous match was at the end of a line terminated by CRLF, an
2223          advance of one character just passes the \r, whereas we should prefer the
2224          longer newline sequence, as does the code in pcre_exec(). Fudge the
2225          offset value to achieve this.
2226    
2227          Otherwise, in the case of UTF-8 matching, the advance must be one
2228          character, not one byte. */
2229    
2230        else        else
2231          {          {
2232          if (g_notempty != 0)          if (g_notempty != 0)
2233            {            {
2234            int onechar = 1;            int onechar = 1;
2235              unsigned int obits = ((real_pcre *)re)->options;
2236            use_offsets[0] = start_offset;            use_offsets[0] = start_offset;
2237            if (use_utf8)            if ((obits & PCRE_NEWLINE_BITS) == 0)
2238                {
2239                int d;
2240                (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2241                obits = (d == '\r')? PCRE_NEWLINE_CR :
2242                        (d == '\n')? PCRE_NEWLINE_LF :
2243                        (d == ('\r'<<8 | '\n'))? PCRE_NEWLINE_CRLF :
2244                        (d == -1)? PCRE_NEWLINE_ANY : 0;
2245                }
2246              if ((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY &&
2247                  start_offset < len - 1 &&
2248                  bptr[start_offset] == '\r' &&
2249                  bptr[start_offset+1] == '\n')
2250                onechar++;
2251              else if (use_utf8)
2252              {              {
2253              while (start_offset + onechar < len)              while (start_offset + onechar < len)
2254                {                {
# Line 1887  while (!done) Line 2283  while (!done)
2283        character. */        character. */
2284    
2285        g_notempty = 0;        g_notempty = 0;
2286    
2287        if (use_offsets[0] == use_offsets[1])        if (use_offsets[0] == use_offsets[1])
2288          {          {
2289          if (use_offsets[0] == len) break;          if (use_offsets[0] == len) break;
# Line 1905  while (!done) Line 2302  while (!done)
2302          len -= use_offsets[1];          len -= use_offsets[1];
2303          }          }
2304        }  /* End of loop for /g and /G */        }  /* End of loop for /g and /G */
2305    
2306        NEXT_DATA: continue;
2307      }    /* End of loop for data lines */      }    /* End of loop for data lines */
2308    
2309    CONTINUE:    CONTINUE:
# Line 1919  while (!done) Line 2318  while (!done)
2318      {      {
2319      new_free((void *)tables);      new_free((void *)tables);
2320      setlocale(LC_CTYPE, "C");      setlocale(LC_CTYPE, "C");
2321        locale_set = 0;
2322      }      }
2323    }    }
2324    

Legend:
Removed from v.87  
changed lines
  Added in v.147

  ViewVC Help
Powered by ViewVC 1.1.5