/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 67 by nigel, Sat Feb 24 21:40:13 2007 UTC revision 79 by nigel, Sat Feb 24 21:40:52 2007 UTC
# Line 4  Line 4 
4    
5  /* This program was hacked up as a tester for PCRE. I really should have  /* This program was hacked up as a tester for PCRE. I really should have
6  written it more tidily in the first place. Will I ever learn? It has grown and  written it more tidily in the first place. Will I ever learn? It has grown and
7  been extended and consequently is now rather untidy in places. */  been extended and consequently is now rather, er, *very* untidy in places.
8    
9    -----------------------------------------------------------------------------
10    Redistribution and use in source and binary forms, with or without
11    modification, are permitted provided that the following conditions are met:
12    
13        * Redistributions of source code must retain the above copyright notice,
14          this list of conditions and the following disclaimer.
15    
16        * Redistributions in binary form must reproduce the above copyright
17          notice, this list of conditions and the following disclaimer in the
18          documentation and/or other materials provided with the distribution.
19    
20        * Neither the name of the University of Cambridge nor the names of its
21          contributors may be used to endorse or promote products derived from
22          this software without specific prior written permission.
23    
24    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34    POSSIBILITY OF SUCH DAMAGE.
35    -----------------------------------------------------------------------------
36    */
37    
38    
39  #include <ctype.h>  #include <ctype.h>
40  #include <stdio.h>  #include <stdio.h>
# Line 12  been extended and consequently is now ra Line 42  been extended and consequently is now ra
42  #include <stdlib.h>  #include <stdlib.h>
43  #include <time.h>  #include <time.h>
44  #include <locale.h>  #include <locale.h>
45    #include <errno.h>
 /* We need the internal info for displaying the results of pcre_study(). Also  
 for getting the opcodes for showing compiled code. */  
46    
47  #define PCRE_SPY        /* For Win32 build, import data, not export */  #define PCRE_SPY        /* For Win32 build, import data, not export */
48  #include "internal.h"  
49    /* We need the internal info for displaying the results of pcre_study() and
50    other internal data; pcretest also uses some of the fixed tables, and generally
51    has "inside information" compared to a program that strictly follows the PCRE
52    API. */
53    
54    #include "pcre_internal.h"
55    
56    
57  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
58  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 27  Makefile. */ Line 62  Makefile. */
62  #include "pcreposix.h"  #include "pcreposix.h"
63  #endif  #endif
64    
65    /* It is also possible, for the benefit of the version imported into Exim, to
66    build pcretest without support for UTF8 (define NOUTF8), without the interface
67    to the DFA matcher (NODFA), and without the doublecheck of the old "info"
68    function (define NOINFOCHECK). */
69    
70    
71  #ifndef CLOCKS_PER_SEC  #ifndef CLOCKS_PER_SEC
72  #ifdef CLK_TCK  #ifdef CLK_TCK
73  #define CLOCKS_PER_SEC CLK_TCK  #define CLOCKS_PER_SEC CLK_TCK
# Line 35  Makefile. */ Line 76  Makefile. */
76  #endif  #endif
77  #endif  #endif
78    
79  #define LOOPREPEAT 50000  #define LOOPREPEAT 500000
80    
81    #define BUFFER_SIZE 30000
82    #define PBUFFER_SIZE BUFFER_SIZE
83    #define DBUFFER_SIZE BUFFER_SIZE
84    
85    
86  static FILE *outfile;  static FILE *outfile;
# Line 45  static int callout_extra; Line 90  static int callout_extra;
90  static int callout_fail_count;  static int callout_fail_count;
91  static int callout_fail_id;  static int callout_fail_id;
92  static int first_callout;  static int first_callout;
93    static int show_malloc;
94  static int use_utf8;  static int use_utf8;
95  static size_t gotten_store;  static size_t gotten_store;
96    
97    static uschar *pbuffer = NULL;
   
 static int utf8_table1[] = {  
   0x0000007f, 0x000007ff, 0x0000ffff, 0x001fffff, 0x03ffffff, 0x7fffffff};  
   
 static int utf8_table2[] = {  
   0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};  
   
 static int utf8_table3[] = {  
   0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};  
   
   
   
 /*************************************************  
 *         Print compiled regex                   *  
 *************************************************/  
   
 /* The code for doing this is held in a separate file that is also included in  
 pcre.c when it is compiled with the debug switch. It defines a function called  
 print_internals(), which uses a table of opcode lengths defined by the macro  
 OP_LENGTHS, whose name must be OP_lengths. */  
   
 static uschar OP_lengths[] = { OP_LENGTHS };  
   
 #include "printint.c"  
98    
99    
100    
# Line 103  return(result); Line 125  return(result);
125    
126    
127    
 /*************************************************  
 *       Convert character value to UTF-8         *  
 *************************************************/  
   
 /* This function takes an integer value in the range 0 - 0x7fffffff  
 and encodes it as a UTF-8 character in 0 to 6 bytes.  
   
 Arguments:  
   cvalue     the character value  
   buffer     pointer to buffer for result - at least 6 bytes long  
   
 Returns:     number of characters placed in the buffer  
              -1 if input character is negative  
              0 if input character is positive but too big (only when  
              int is longer than 32 bits)  
 */  
   
 static int  
 ord2utf8(int cvalue, unsigned char *buffer)  
 {  
 register int i, j;  
 for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++)  
   if (cvalue <= utf8_table1[i]) break;  
 if (i >= sizeof(utf8_table1)/sizeof(int)) return 0;  
 if (cvalue < 0) return -1;  
   
 buffer += i;  
 for (j = i; j > 0; j--)  
  {  
  *buffer-- = 0x80 | (cvalue & 0x3f);  
  cvalue >>= 6;  
  }  
 *buffer = utf8_table2[i] | cvalue;  
 return i + 1;  
 }  
   
128    
129  /*************************************************  /*************************************************
130  *            Convert UTF-8 string to value       *  *            Convert UTF-8 string to value       *
# Line 155  Returns:   >  0 => the number of bytes c Line 141  Returns:   >  0 => the number of bytes c
141             -6 to 0 => malformed UTF-8 character at offset = (-return)             -6 to 0 => malformed UTF-8 character at offset = (-return)
142  */  */
143    
144    #if !defined NOUTF8
145    
146  static int  static int
147  utf82ord(unsigned char *buffer, int *vptr)  utf82ord(unsigned char *buffer, int *vptr)
148  {  {
# Line 174  if (i == 0 || i == 6) return 0;        / Line 162  if (i == 0 || i == 6) return 0;        /
162  /* i now has a value in the range 1-5 */  /* i now has a value in the range 1-5 */
163    
164  s = 6*i;  s = 6*i;
165  d = (c & utf8_table3[i]) << s;  d = (c & _pcre_utf8_table3[i]) << s;
166    
167  for (j = 0; j < i; j++)  for (j = 0; j < i; j++)
168    {    {
# Line 186  for (j = 0; j < i; j++) Line 174  for (j = 0; j < i; j++)
174    
175  /* Check that encoding was the correct unique one */  /* Check that encoding was the correct unique one */
176    
177  for (j = 0; j < sizeof(utf8_table1)/sizeof(int); j++)  for (j = 0; j < _pcre_utf8_table1_size; j++)
178    if (d <= utf8_table1[j]) break;    if (d <= _pcre_utf8_table1[j]) break;
179  if (j != i) return -(i+1);  if (j != i) return -(i+1);
180    
181  /* Valid value */  /* Valid value */
# Line 196  if (j != i) return -(i+1); Line 184  if (j != i) return -(i+1);
184  return i+1;  return i+1;
185  }  }
186    
187    #endif
188    
189    
190    
191  /*************************************************  /*************************************************
# Line 213  int yield = 0; Line 203  int yield = 0;
203    
204  while (length-- > 0)  while (length-- > 0)
205    {    {
206    #if !defined NOUTF8
207    if (use_utf8)    if (use_utf8)
208      {      {
209      int rc = utf82ord(p, &c);      int rc = utf82ord(p, &c);
# Line 235  while (length-- > 0) Line 226  while (length-- > 0)
226        continue;        continue;
227        }        }
228      }      }
229    #endif
230    
231     /* Not UTF-8, or malformed UTF-8  */     /* Not UTF-8, or malformed UTF-8  */
232    
# Line 266  data is not zero. */ Line 258  data is not zero. */
258  static int callout(pcre_callout_block *cb)  static int callout(pcre_callout_block *cb)
259  {  {
260  FILE *f = (first_callout | callout_extra)? outfile : NULL;  FILE *f = (first_callout | callout_extra)? outfile : NULL;
261  int i, pre_start, post_start;  int i, pre_start, post_start, subject_length;
262    
263  if (callout_extra)  if (callout_extra)
264    {    {
# Line 297  pre_start = pchars((unsigned char *)cb-> Line 289  pre_start = pchars((unsigned char *)cb->
289  post_start = pchars((unsigned char *)(cb->subject + cb->start_match),  post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
290    cb->current_position - cb->start_match, f);    cb->current_position - cb->start_match, f);
291    
292    subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
293    
294  (void)pchars((unsigned char *)(cb->subject + cb->current_position),  (void)pchars((unsigned char *)(cb->subject + cb->current_position),
295    cb->subject_length - cb->current_position, f);    cb->subject_length - cb->current_position, f);
296    
297  if (f != NULL) fprintf(f, "\n");  if (f != NULL) fprintf(f, "\n");
298    
299  /* Always print appropriate indicators, with callout number if not already  /* Always print appropriate indicators, with callout number if not already
300  shown */  shown. For automatic callouts, show the pattern offset. */
301    
302  if (callout_extra) fprintf(outfile, "    ");  if (cb->callout_number == 255)
303    else fprintf(outfile, "%3d ", cb->callout_number);    {
304      fprintf(outfile, "%+3d ", cb->pattern_position);
305      if (cb->pattern_position > 99) fprintf(outfile, "\n    ");
306      }
307    else
308      {
309      if (callout_extra) fprintf(outfile, "    ");
310        else fprintf(outfile, "%3d ", cb->callout_number);
311      }
312    
313  for (i = 0; i < pre_start; i++) fprintf(outfile, " ");  for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
314  fprintf(outfile, "^");  fprintf(outfile, "^");
# Line 317  if (post_start > 0) Line 319  if (post_start > 0)
319    fprintf(outfile, "^");    fprintf(outfile, "^");
320    }    }
321    
322  fprintf(outfile, "\n");  for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
323      fprintf(outfile, " ");
324    
325    fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
326      pbuffer + cb->pattern_position);
327    
328    fprintf(outfile, "\n");
329  first_callout = 0;  first_callout = 0;
330    
331  if ((int)(cb->callout_data) != 0)  if (cb->callout_data != NULL)
332    {    {
333    fprintf(outfile, "Callout data = %d\n", (int)(cb->callout_data));    int callout_data = *((int *)(cb->callout_data));
334    return (int)(cb->callout_data);    if (callout_data != 0)
335        {
336        fprintf(outfile, "Callout data = %d\n", callout_data);
337        return callout_data;
338        }
339    }    }
340    
341  return (cb->callout_number != callout_fail_id)? 0 :  return (cb->callout_number != callout_fail_id)? 0 :
# Line 333  return (cb->callout_number != callout_fa Line 344  return (cb->callout_number != callout_fa
344    
345    
346  /*************************************************  /*************************************************
347  *            Local malloc function               *  *            Local malloc functions              *
348  *************************************************/  *************************************************/
349    
350  /* Alternative malloc function, to test functionality and show the size of the  /* Alternative malloc function, to test functionality and show the size of the
# Line 341  compiled re. */ Line 352  compiled re. */
352    
353  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
354  {  {
355    void *block = malloc(size);
356  gotten_store = size;  gotten_store = size;
357  return malloc(size);  if (show_malloc)
358      fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
359    return block;
360  }  }
361    
362    static void new_free(void *block)
363    {
364    if (show_malloc)
365      fprintf(outfile, "free             %p\n", block);
366    free(block);
367    }
368    
369    
370    /* For recursion malloc/free, to test stacking calls */
371    
372    static void *stack_malloc(size_t size)
373    {
374    void *block = malloc(size);
375    if (show_malloc)
376      fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
377    return block;
378    }
379    
380    static void stack_free(void *block)
381    {
382    if (show_malloc)
383      fprintf(outfile, "stack_free       %p\n", block);
384    free(block);
385    }
386    
387    
388  /*************************************************  /*************************************************
# Line 363  if ((rc = pcre_fullinfo(re, study, optio Line 401  if ((rc = pcre_fullinfo(re, study, optio
401    
402    
403  /*************************************************  /*************************************************
404    *         Byte flipping function                 *
405    *************************************************/
406    
407    static long int
408    byteflip(long int value, int n)
409    {
410    if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
411    return ((value & 0x000000ff) << 24) |
412           ((value & 0x0000ff00) <<  8) |
413           ((value & 0x00ff0000) >>  8) |
414           ((value & 0xff000000) >> 24);
415    }
416    
417    
418    
419    
420    /*************************************************
421  *                Main Program                    *  *                Main Program                    *
422  *************************************************/  *************************************************/
423    
# Line 381  int showinfo = 0; Line 436  int showinfo = 0;
436  int showstore = 0;  int showstore = 0;
437  int size_offsets = 45;  int size_offsets = 45;
438  int size_offsets_max;  int size_offsets_max;
439  int *offsets;  int *offsets = NULL;
440  #if !defined NOPOSIX  #if !defined NOPOSIX
441  int posix = 0;  int posix = 0;
442  #endif  #endif
443  int debug = 0;  int debug = 0;
444  int done = 0;  int done = 0;
445  unsigned char buffer[30000];  int all_use_dfa = 0;
446  unsigned char dbuffer[1024];  int yield = 0;
447    
448  /* Static so that new_malloc can use it. */  unsigned char *buffer;
449    unsigned char *dbuffer;
450    
451    /* Get buffers from malloc() so that Electric Fence will check their misuse
452    when I am debugging. */
453    
454    buffer = (unsigned char *)malloc(BUFFER_SIZE);
455    dbuffer = (unsigned char *)malloc(DBUFFER_SIZE);
456    pbuffer = (unsigned char *)malloc(PBUFFER_SIZE);
457    
458    /* The outfile variable is static so that new_malloc can use it. The _setmode()
459    stuff is some magic that I don't understand, but which apparently does good
460    things in Windows. It's related to line terminations.  */
461    
462    #if defined(_WIN32) || defined(WIN32)
463    _setmode( _fileno( stdout ), 0x8000 );
464    #endif  /* defined(_WIN32) || defined(WIN32) */
465    
466  outfile = stdout;  outfile = stdout;
467    
# Line 405  while (argc > 1 && argv[op][0] == '-') Line 476  while (argc > 1 && argv[op][0] == '-')
476    else if (strcmp(argv[op], "-t") == 0) timeit = 1;    else if (strcmp(argv[op], "-t") == 0) timeit = 1;
477    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
478    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
479    #if !defined NODFA
480      else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
481    #endif
482    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
483        ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),        ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
484          *endptr == 0))          *endptr == 0))
# Line 422  while (argc > 1 && argv[op][0] == '-') Line 496  while (argc > 1 && argv[op][0] == '-')
496      printf("Compiled with\n");      printf("Compiled with\n");
497      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
498      printf("  %sUTF-8 support\n", rc? "" : "No ");      printf("  %sUTF-8 support\n", rc? "" : "No ");
499        (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
500        printf("  %sUnicode properties support\n", rc? "" : "No ");
501      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
502      printf("  Newline character is %s\n", (rc == '\r')? "CR" : "LF");      printf("  Newline character is %s\n", (rc == '\r')? "CR" : "LF");
503      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
# Line 430  while (argc > 1 && argv[op][0] == '-') Line 506  while (argc > 1 && argv[op][0] == '-')
506      printf("  POSIX malloc threshold = %d\n", rc);      printf("  POSIX malloc threshold = %d\n", rc);
507      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
508      printf("  Default match limit = %d\n", rc);      printf("  Default match limit = %d\n", rc);
509        (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
510        printf("  Match recursion uses %s\n", rc? "stack" : "heap");
511      exit(0);      exit(0);
512      }      }
513    else    else
# Line 437  while (argc > 1 && argv[op][0] == '-') Line 515  while (argc > 1 && argv[op][0] == '-')
515      printf("** Unknown or malformed option %s\n", argv[op]);      printf("** Unknown or malformed option %s\n", argv[op]);
516      printf("Usage:   pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");      printf("Usage:   pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");
517      printf("  -C     show PCRE compile-time options and exit\n");      printf("  -C     show PCRE compile-time options and exit\n");
518      printf("  -d     debug: show compiled code; implies -i\n"      printf("  -d     debug: show compiled code; implies -i\n");
519             "  -i     show information about compiled pattern\n"  #if !defined NODFA
520        printf("  -dfa   force DFA matching for all subjects\n");
521    #endif
522        printf("  -i     show information about compiled pattern\n"
523               "  -m     output memory used information\n"
524             "  -o <n> set size of offsets vector to <n>\n");             "  -o <n> set size of offsets vector to <n>\n");
525  #if !defined NOPOSIX  #if !defined NOPOSIX
526      printf("  -p     use POSIX interface\n");      printf("  -p     use POSIX interface\n");
527  #endif  #endif
528      printf("  -s     output store information\n"      printf("  -s     output store (memory) used information\n"
529             "  -t     time compilation and execution\n");             "  -t     time compilation and execution\n");
530      return 1;      yield = 1;
531        goto EXIT;
532      }      }
533    op++;    op++;
534    argc--;    argc--;
# Line 454  while (argc > 1 && argv[op][0] == '-') Line 537  while (argc > 1 && argv[op][0] == '-')
537  /* Get the store for the offsets vector, and remember what it was */  /* Get the store for the offsets vector, and remember what it was */
538    
539  size_offsets_max = size_offsets;  size_offsets_max = size_offsets;
540  offsets = malloc(size_offsets_max * sizeof(int));  offsets = (int *)malloc(size_offsets_max * sizeof(int));
541  if (offsets == NULL)  if (offsets == NULL)
542    {    {
543    printf("** Failed to get %d bytes of memory for offsets vector\n",    printf("** Failed to get %d bytes of memory for offsets vector\n",
544      size_offsets_max * sizeof(int));      size_offsets_max * sizeof(int));
545    return 1;    yield = 1;
546      goto EXIT;
547    }    }
548    
549  /* Sort out the input and output files */  /* Sort out the input and output files */
550    
551  if (argc > 1)  if (argc > 1)
552    {    {
553    infile = fopen(argv[op], "r");    infile = fopen(argv[op], "rb");
554    if (infile == NULL)    if (infile == NULL)
555      {      {
556      printf("** Failed to open %s\n", argv[op]);      printf("** Failed to open %s\n", argv[op]);
557      return 1;      yield = 1;
558        goto EXIT;
559      }      }
560    }    }
561    
562  if (argc > 2)  if (argc > 2)
563    {    {
564    outfile = fopen(argv[op+1], "w");    outfile = fopen(argv[op+1], "wb");
565    if (outfile == NULL)    if (outfile == NULL)
566      {      {
567      printf("** Failed to open %s\n", argv[op+1]);      printf("** Failed to open %s\n", argv[op+1]);
568      return 1;      yield = 1;
569        goto EXIT;
570      }      }
571    }    }
572    
573  /* Set alternative malloc function */  /* Set alternative malloc function */
574    
575  pcre_malloc = new_malloc;  pcre_malloc = new_malloc;
576    pcre_free = new_free;
577    pcre_stack_malloc = stack_malloc;
578    pcre_stack_free = stack_free;
579    
580  /* Heading line, then prompt for first regex if stdin */  /* Heading line, then prompt for first regex if stdin */
581    
# Line 506  while (!done) Line 595  while (!done)
595    
596    const char *error;    const char *error;
597    unsigned char *p, *pp, *ppp;    unsigned char *p, *pp, *ppp;
598      unsigned char *to_file = NULL;
599    const unsigned char *tables = NULL;    const unsigned char *tables = NULL;
600      unsigned long int true_size, true_study_size = 0;
601      size_t size, regex_gotten_store;
602    int do_study = 0;    int do_study = 0;
603    int do_debug = debug;    int do_debug = debug;
604    int do_G = 0;    int do_G = 0;
605    int do_g = 0;    int do_g = 0;
606    int do_showinfo = showinfo;    int do_showinfo = showinfo;
607    int do_showrest = 0;    int do_showrest = 0;
608      int do_flip = 0;
609    int erroroffset, len, delimiter;    int erroroffset, len, delimiter;
610    
611    use_utf8 = 0;    use_utf8 = 0;
612    
613    if (infile == stdin) printf("  re> ");    if (infile == stdin) printf("  re> ");
614    if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;    if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL) break;
615    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
616    fflush(outfile);    fflush(outfile);
617    
# Line 526  while (!done) Line 619  while (!done)
619    while (isspace(*p)) p++;    while (isspace(*p)) p++;
620    if (*p == 0) continue;    if (*p == 0) continue;
621    
622    /* Get the delimiter and seek the end of the pattern; if is isn't    /* See if the pattern is to be loaded pre-compiled from a file. */
623    complete, read more. */  
624      if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
625        {
626        unsigned long int magic;
627        uschar sbuf[8];
628        FILE *f;
629    
630        p++;
631        pp = p + (int)strlen((char *)p);
632        while (isspace(pp[-1])) pp--;
633        *pp = 0;
634    
635        f = fopen((char *)p, "rb");
636        if (f == NULL)
637          {
638          fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
639          continue;
640          }
641    
642        if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
643    
644        true_size =
645          (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
646        true_study_size =
647          (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
648    
649        re = (real_pcre *)new_malloc(true_size);
650        regex_gotten_store = gotten_store;
651    
652        if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
653    
654        magic = ((real_pcre *)re)->magic_number;
655        if (magic != MAGIC_NUMBER)
656          {
657          if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
658            {
659            do_flip = 1;
660            }
661          else
662            {
663            fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
664            fclose(f);
665            continue;
666            }
667          }
668    
669        fprintf(outfile, "Compiled regex%s loaded from %s\n",
670          do_flip? " (byte-inverted)" : "", p);
671    
672        /* Need to know if UTF-8 for printing data strings */
673    
674        new_info(re, NULL, PCRE_INFO_OPTIONS, &options);
675        use_utf8 = (options & PCRE_UTF8) != 0;
676    
677        /* Now see if there is any following study data */
678    
679        if (true_study_size != 0)
680          {
681          pcre_study_data *psd;
682    
683          extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
684          extra->flags = PCRE_EXTRA_STUDY_DATA;
685    
686          psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
687          extra->study_data = psd;
688    
689          if (fread(psd, 1, true_study_size, f) != true_study_size)
690            {
691            FAIL_READ:
692            fprintf(outfile, "Failed to read data from %s\n", p);
693            if (extra != NULL) new_free(extra);
694            if (re != NULL) new_free(re);
695            fclose(f);
696            continue;
697            }
698          fprintf(outfile, "Study data loaded from %s\n", p);
699          do_study = 1;     /* To get the data output if requested */
700          }
701        else fprintf(outfile, "No study data\n");
702    
703        fclose(f);
704        goto SHOW_INFO;
705        }
706    
707      /* In-line pattern (the usual case). Get the delimiter and seek the end of
708      the pattern; if is isn't complete, read more. */
709    
710    delimiter = *p++;    delimiter = *p++;
711    
# Line 549  while (!done) Line 727  while (!done)
727        }        }
728      if (*pp != 0) break;      if (*pp != 0) break;
729    
730      len = sizeof(buffer) - (pp - buffer);      len = BUFFER_SIZE - (pp - buffer);
731      if (len < 256)      if (len < 256)
732        {        {
733        fprintf(outfile, "** Expression too long - missing delimiter?\n");        fprintf(outfile, "** Expression too long - missing delimiter?\n");
# Line 572  while (!done) Line 750  while (!done)
750    
751    if (pp[1] == '\\') *pp++ = '\\';    if (pp[1] == '\\') *pp++ = '\\';
752    
753    /* Terminate the pattern at the delimiter */    /* Terminate the pattern at the delimiter, and save a copy of the pattern
754      for callouts. */
755    
756    *pp++ = 0;    *pp++ = 0;
757      strcpy((char *)pbuffer, (char *)p);
758    
759    /* Look for options after final delimiter */    /* Look for options after final delimiter */
760    
# Line 586  while (!done) Line 766  while (!done)
766      {      {
767      switch (*pp++)      switch (*pp++)
768        {        {
769          case 'f': options |= PCRE_FIRSTLINE; break;
770        case 'g': do_g = 1; break;        case 'g': do_g = 1; break;
771        case 'i': options |= PCRE_CASELESS; break;        case 'i': options |= PCRE_CASELESS; break;
772        case 'm': options |= PCRE_MULTILINE; break;        case 'm': options |= PCRE_MULTILINE; break;
# Line 594  while (!done) Line 775  while (!done)
775    
776        case '+': do_showrest = 1; break;        case '+': do_showrest = 1; break;
777        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
778          case 'C': options |= PCRE_AUTO_CALLOUT; break;
779        case 'D': do_debug = do_showinfo = 1; break;        case 'D': do_debug = do_showinfo = 1; break;
780        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
781          case 'F': do_flip = 1; break;
782        case 'G': do_G = 1; break;        case 'G': do_G = 1; break;
783        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
784        case 'M': log_store = 1; break;        case 'M': log_store = 1; break;
# Line 609  while (!done) Line 792  while (!done)
792        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
793        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
794        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
795          case '?': options |= PCRE_NO_UTF8_CHECK; break;
796    
797        case 'L':        case 'L':
798        ppp = pp;        ppp = pp;
799        while (*ppp != '\n' && *ppp != ' ') ppp++;        /* The '\r' test here is so that it works on Windows */
800          while (*ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
801        *ppp = 0;        *ppp = 0;
802        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
803          {          {
# Line 623  while (!done) Line 808  while (!done)
808        pp = ppp;        pp = ppp;
809        break;        break;
810    
811        case '\n': case ' ': break;        case '>':
812          to_file = pp;
813          while (*pp != 0) pp++;
814          while (isspace(pp[-1])) pp--;
815          *pp = 0;
816          break;
817    
818          case '\r':                      /* So that it works in Windows */
819          case '\n':
820          case ' ':
821          break;
822    
823        default:        default:
824        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
825        goto SKIP_DATA;        goto SKIP_DATA;
# Line 639  while (!done) Line 835  while (!done)
835      {      {
836      int rc;      int rc;
837      int cflags = 0;      int cflags = 0;
838    
839      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
840      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
841        if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
842      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
843    
844      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 648  while (!done) Line 846  while (!done)
846    
847      if (rc != 0)      if (rc != 0)
848        {        {
849        (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));        (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);
850        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
851        goto SKIP_DATA;        goto SKIP_DATA;
852        }        }
# Line 689  while (!done) Line 887  while (!done)
887          {          {
888          for (;;)          for (;;)
889            {            {
890            if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)            if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)
891              {              {
892              done = 1;              done = 1;
893              goto CONTINUE;              goto CONTINUE;
# Line 713  while (!done) Line 911  while (!done)
911                sizeof(real_pcre) -                sizeof(real_pcre) -
912                ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));                ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
913    
914        /* Extract the size for possible writing before possibly flipping it,
915        and remember the store that was got. */
916    
917        true_size = ((real_pcre *)re)->size;
918        regex_gotten_store = gotten_store;
919    
920        /* If /S was present, study the regexp to generate additional info to
921        help with the matching. */
922    
923        if (do_study)
924          {
925          if (timeit)
926            {
927            register int i;
928            clock_t time_taken;
929            clock_t start_time = clock();
930            for (i = 0; i < LOOPREPEAT; i++)
931              extra = pcre_study(re, study_options, &error);
932            time_taken = clock() - start_time;
933            if (extra != NULL) free(extra);
934            fprintf(outfile, "  Study time %.3f milliseconds\n",
935              (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
936                (double)CLOCKS_PER_SEC);
937            }
938          extra = pcre_study(re, study_options, &error);
939          if (error != NULL)
940            fprintf(outfile, "Failed to study: %s\n", error);
941          else if (extra != NULL)
942            true_study_size = ((pcre_study_data *)(extra->study_data))->size;
943          }
944    
945        /* If the 'F' option was present, we flip the bytes of all the integer
946        fields in the regex data block and the study block. This is to make it
947        possible to test PCRE's handling of byte-flipped patterns, e.g. those
948        compiled on a different architecture. */
949    
950        if (do_flip)
951          {
952          real_pcre *rre = (real_pcre *)re;
953          rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));
954          rre->size = byteflip(rre->size, sizeof(rre->size));
955          rre->options = byteflip(rre->options, sizeof(rre->options));
956          rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));
957          rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));
958          rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));
959          rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));
960          rre->name_table_offset = byteflip(rre->name_table_offset,
961            sizeof(rre->name_table_offset));
962          rre->name_entry_size = byteflip(rre->name_entry_size,
963            sizeof(rre->name_entry_size));
964          rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));
965    
966          if (extra != NULL)
967            {
968            pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
969            rsd->size = byteflip(rsd->size, sizeof(rsd->size));
970            rsd->options = byteflip(rsd->options, sizeof(rsd->options));
971            }
972          }
973    
974        /* Extract information from the compiled data if required */
975    
976        SHOW_INFO:
977    
978      if (do_showinfo)      if (do_showinfo)
979        {        {
980        unsigned long int get_options;        unsigned long int get_options, all_options;
981    #if !defined NOINFOCHECK
982        int old_first_char, old_options, old_count;        int old_first_char, old_options, old_count;
983    #endif
984        int count, backrefmax, first_char, need_char;        int count, backrefmax, first_char, need_char;
985        int nameentrysize, namecount;        int nameentrysize, namecount;
986        const uschar *nametable;        const uschar *nametable;
       size_t size;  
987    
988        if (do_debug)        if (do_debug)
989          {          {
990          fprintf(outfile, "------------------------------------------------------------------\n");          fprintf(outfile, "------------------------------------------------------------------\n");
991          print_internals(re, outfile);          _pcre_printint(re, outfile);
992          }          }
993    
994        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
# Line 738  while (!done) Line 1001  while (!done)
1001        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1002        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1003    
1004    #if !defined NOINFOCHECK
1005        old_count = pcre_info(re, &old_options, &old_first_char);        old_count = pcre_info(re, &old_options, &old_first_char);
1006        if (count < 0) fprintf(outfile,        if (count < 0) fprintf(outfile,
1007          "Error %d from pcre_info()\n", count);          "Error %d from pcre_info()\n", count);
# Line 755  while (!done) Line 1019  while (!done)
1019            "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",            "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1020              get_options, old_options);              get_options, old_options);
1021          }          }
1022    #endif
1023    
1024        if (size != gotten_store) fprintf(outfile,        if (size != regex_gotten_store) fprintf(outfile,
1025          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1026          size, gotten_store);          (int)size, (int)regex_gotten_store);
1027    
1028        fprintf(outfile, "Capturing subpattern count = %d\n", count);        fprintf(outfile, "Capturing subpattern count = %d\n", count);
1029        if (backrefmax > 0)        if (backrefmax > 0)
# Line 776  while (!done) Line 1041  while (!done)
1041            }            }
1042          }          }
1043    
1044          /* The NOPARTIAL bit is a private bit in the options, so we have
1045          to fish it out via out back door */
1046    
1047          all_options = ((real_pcre *)re)->options;
1048          if (do_flip)
1049            {
1050            all_options = byteflip(all_options, sizeof(all_options));
1051            }
1052    
1053          if ((all_options & PCRE_NOPARTIAL) != 0)
1054            fprintf(outfile, "Partial matching not supported\n");
1055    
1056        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
1057          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s\n",
1058            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1059            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1060            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1061            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1062              ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1063            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1064            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1065            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1066            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1067            ((get_options & PCRE_UTF8) != 0)? " utf8" : "");            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1068              ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "");
1069    
1070        if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)        if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)
1071          fprintf(outfile, "Case state changes\n");          fprintf(outfile, "Case state changes\n");
# Line 824  while (!done) Line 1103  while (!done)
1103          else          else
1104            fprintf(outfile, "Need char = %d%s\n", ch, caseless);            fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1105          }          }
       }  
1106    
1107      /* If /S was present, study the regexp to generate additional info to        /* Don't output study size; at present it is in any case a fixed
1108      help with the matching. */        value, but it varies, depending on the computer architecture, and
1109          so messes up the test suite. (And with the /F option, it might be
1110          flipped.) */
1111    
1112      if (do_study)        if (do_study)
       {  
       if (timeit)  
1113          {          {
1114          register int i;          if (extra == NULL)
1115          clock_t time_taken;            fprintf(outfile, "Study returned NULL\n");
1116          clock_t start_time = clock();          else
1117          for (i = 0; i < LOOPREPEAT; i++)            {
1118            extra = pcre_study(re, study_options, &error);            uschar *start_bits = NULL;
1119          time_taken = clock() - start_time;            new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1120          if (extra != NULL) free(extra);  
1121          fprintf(outfile, "  Study time %.3f milliseconds\n",            if (start_bits == NULL)
1122            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /              fprintf(outfile, "No starting byte set\n");
1123              (double)CLOCKS_PER_SEC);            else
1124                {
1125                int i;
1126                int c = 24;
1127                fprintf(outfile, "Starting byte set: ");
1128                for (i = 0; i < 256; i++)
1129                  {
1130                  if ((start_bits[i/8] & (1<<(i&7))) != 0)
1131                    {
1132                    if (c > 75)
1133                      {
1134                      fprintf(outfile, "\n  ");
1135                      c = 2;
1136                      }
1137                    if (isprint(i) && i != ' ')
1138                      {
1139                      fprintf(outfile, "%c ", i);
1140                      c += 2;
1141                      }
1142                    else
1143                      {
1144                      fprintf(outfile, "\\x%02x ", i);
1145                      c += 5;
1146                      }
1147                    }
1148                  }
1149                fprintf(outfile, "\n");
1150                }
1151              }
1152          }          }
1153          }
1154    
1155        extra = pcre_study(re, study_options, &error);      /* If the '>' option was present, we write out the regex to a file, and
1156        if (error != NULL)      that is all. The first 8 bytes of the file are the regex length and then
1157          fprintf(outfile, "Failed to study: %s\n", error);      the study length, in big-endian order. */
       else if (extra == NULL)  
         fprintf(outfile, "Study returned NULL\n");  
1158    
1159        else if (do_showinfo)      if (to_file != NULL)
1160          {
1161          FILE *f = fopen((char *)to_file, "wb");
1162          if (f == NULL)
1163          {          {
1164          size_t size;          fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1165          uschar *start_bits = NULL;          }
1166          new_info(re, extra, PCRE_INFO_STUDYSIZE, &size);        else
1167          new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);          {
1168          fprintf(outfile, "Study size = %d\n", size);          uschar sbuf[8];
1169          if (start_bits == NULL)          sbuf[0] = (true_size >> 24)  & 255;
1170            fprintf(outfile, "No starting character set\n");          sbuf[1] = (true_size >> 16)  & 255;
1171            sbuf[2] = (true_size >>  8)  & 255;
1172            sbuf[3] = (true_size)  & 255;
1173    
1174            sbuf[4] = (true_study_size >> 24)  & 255;
1175            sbuf[5] = (true_study_size >> 16)  & 255;
1176            sbuf[6] = (true_study_size >>  8)  & 255;
1177            sbuf[7] = (true_study_size)  & 255;
1178    
1179            if (fwrite(sbuf, 1, 8, f) < 8 ||
1180                fwrite(re, 1, true_size, f) < true_size)
1181              {
1182              fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1183              }
1184          else          else
1185            {            {
1186            int i;            fprintf(outfile, "Compiled regex written to %s\n", to_file);
1187            int c = 24;            if (extra != NULL)
           fprintf(outfile, "Starting character set: ");  
           for (i = 0; i < 256; i++)  
1188              {              {
1189              if ((start_bits[i/8] & (1<<(i%8))) != 0)              if (fwrite(extra->study_data, 1, true_study_size, f) <
1190                    true_study_size)
1191                {                {
1192                if (c > 75)                fprintf(outfile, "Write error on %s: %s\n", to_file,
1193                  {                  strerror(errno));
                 fprintf(outfile, "\n  ");  
                 c = 2;  
                 }  
               if (isprint(i) && i != ' ')  
                 {  
                 fprintf(outfile, "%c ", i);  
                 c += 2;  
                 }  
               else  
                 {  
                 fprintf(outfile, "\\x%02x ", i);  
                 c += 5;  
                 }  
1194                }                }
1195                else fprintf(outfile, "Study data written to %s\n", to_file);
1196              }              }
           fprintf(outfile, "\n");  
1197            }            }
1198            fclose(f);
1199          }          }
1200    
1201          new_free(re);
1202          if (extra != NULL) new_free(extra);
1203          if (tables != NULL) new_free((void *)tables);
1204          continue;  /* With next regex */
1205        }        }
1206      }      }        /* End of non-POSIX compile */
1207    
1208    /* Read data lines and test them */    /* Read data lines and test them */
1209    
# Line 910  while (!done) Line 1223  while (!done)
1223      int gmatched = 0;      int gmatched = 0;
1224      int start_offset = 0;      int start_offset = 0;
1225      int g_notempty = 0;      int g_notempty = 0;
1226        int use_dfa = 0;
1227    
1228      options = 0;      options = 0;
1229    
# Line 919  while (!done) Line 1233  while (!done)
1233      callout_count = 0;      callout_count = 0;
1234      callout_fail_count = 999999;      callout_fail_count = 999999;
1235      callout_fail_id = -1;      callout_fail_id = -1;
1236        show_malloc = 0;
1237    
1238      if (infile == stdin) printf("data> ");      if (infile == stdin) printf("data> ");
1239      if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)      if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)
1240        {        {
1241        done = 1;        done = 1;
1242        goto CONTINUE;        goto CONTINUE;
# Line 964  while (!done) Line 1279  while (!done)
1279    
1280          /* Handle \x{..} specially - new Perl thing for utf8 */          /* Handle \x{..} specially - new Perl thing for utf8 */
1281    
1282    #if !defined NOUTF8
1283          if (*p == '{')          if (*p == '{')
1284            {            {
1285            unsigned char *pt = p;            unsigned char *pt = p;
# Line 974  while (!done) Line 1290  while (!done)
1290              {              {
1291              unsigned char buff8[8];              unsigned char buff8[8];
1292              int ii, utn;              int ii, utn;
1293              utn = ord2utf8(c, buff8);              utn = _pcre_ord2utf8(c, buff8);
1294              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1295              c = buff8[ii];   /* Last byte */              c = buff8[ii];   /* Last byte */
1296              p = pt + 1;              p = pt + 1;
# Line 982  while (!done) Line 1298  while (!done)
1298              }              }
1299            /* Not correct form; fall through */            /* Not correct form; fall through */
1300            }            }
1301    #endif
1302    
1303          /* Ordinary \x */          /* Ordinary \x */
1304    
# Line 993  while (!done) Line 1310  while (!done)
1310            }            }
1311          break;          break;
1312    
1313          case 0:   /* Allows for an empty line */          case 0:   /* \ followed by EOF allows for an empty line */
1314          p--;          p--;
1315          continue;          continue;
1316    
1317            case '>':
1318            while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1319            continue;
1320    
1321          case 'A':  /* Option setting */          case 'A':  /* Option setting */
1322          options |= PCRE_ANCHORED;          options |= PCRE_ANCHORED;
1323          continue;          continue;
# Line 1058  while (!done) Line 1379  while (!done)
1379            }            }
1380          continue;          continue;
1381    
1382    #if !defined NODFA
1383            case 'D':
1384    #if !defined NOPOSIX
1385            if (posix || do_posix)
1386              printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1387            else
1388    #endif
1389              use_dfa = 1;
1390            continue;
1391    
1392            case 'F':
1393            options |= PCRE_DFA_SHORTEST;
1394            continue;
1395    #endif
1396    
1397          case 'G':          case 'G':
1398          if (isdigit(*p))          if (isdigit(*p))
1399            {            {
# Line 1095  while (!done) Line 1431  while (!done)
1431            {            {
1432            size_offsets_max = n;            size_offsets_max = n;
1433            free(offsets);            free(offsets);
1434            use_offsets = offsets = malloc(size_offsets_max * sizeof(int));            use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1435            if (offsets == NULL)            if (offsets == NULL)
1436              {              {
1437              printf("** Failed to get %d bytes of memory for offsets vector\n",              printf("** Failed to get %d bytes of memory for offsets vector\n",
1438                size_offsets_max * sizeof(int));                size_offsets_max * sizeof(int));
1439              return 1;              yield = 1;
1440                goto EXIT;
1441              }              }
1442            }            }
1443          use_size_offsets = n;          use_size_offsets = n;
1444          if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */          if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */
1445          continue;          continue;
1446    
1447            case 'P':
1448            options |= PCRE_PARTIAL;
1449            continue;
1450    
1451    #if !defined NODFA
1452            case 'R':
1453            options |= PCRE_DFA_RESTART;
1454            continue;
1455    #endif
1456    
1457            case 'S':
1458            show_malloc = 1;
1459            continue;
1460    
1461          case 'Z':          case 'Z':
1462          options |= PCRE_NOTEOL;          options |= PCRE_NOTEOL;
1463          continue;          continue;
1464    
1465            case '?':
1466            options |= PCRE_NO_UTF8_CHECK;
1467            continue;
1468          }          }
1469        *q++ = c;        *q++ = c;
1470        }        }
1471      *q = 0;      *q = 0;
1472      len = q - dbuffer;      len = q - dbuffer;
1473    
1474        if ((all_use_dfa || use_dfa) && find_match_limit)
1475          {
1476          printf("**Match limit not relevant for DFA matching: ignored\n");
1477          find_match_limit = 0;
1478          }
1479    
1480      /* Handle matching via the POSIX interface, which does not      /* Handle matching via the POSIX interface, which does not
1481      support timing or playing with the match limit or callout data. */      support timing or playing with the match limit or callout data. */
1482    
# Line 1126  while (!done) Line 1487  while (!done)
1487        int eflags = 0;        int eflags = 0;
1488        regmatch_t *pmatch = NULL;        regmatch_t *pmatch = NULL;
1489        if (use_size_offsets > 0)        if (use_size_offsets > 0)
1490          pmatch = malloc(sizeof(regmatch_t) * use_size_offsets);          pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
1491        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1492        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1493    
# Line 1134  while (!done) Line 1495  while (!done)
1495    
1496        if (rc != 0)        if (rc != 0)
1497          {          {
1498          (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));          (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);
1499          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1500          }          }
1501        else        else
# Line 1173  while (!done) Line 1534  while (!done)
1534          register int i;          register int i;
1535          clock_t time_taken;          clock_t time_taken;
1536          clock_t start_time = clock();          clock_t start_time = clock();
1537    
1538    #if !defined NODFA
1539            if (all_use_dfa || use_dfa)
1540              {
1541              int workspace[1000];
1542              for (i = 0; i < LOOPREPEAT; i++)
1543                count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
1544                  options | g_notempty, use_offsets, use_size_offsets, workspace,
1545                  sizeof(workspace)/sizeof(int));
1546              }
1547            else
1548    #endif
1549    
1550          for (i = 0; i < LOOPREPEAT; i++)          for (i = 0; i < LOOPREPEAT; i++)
1551            count = pcre_exec(re, extra, (char *)bptr, len,            count = pcre_exec(re, extra, (char *)bptr, len,
1552              start_offset, options | g_notempty, use_offsets, use_size_offsets);              start_offset, options | g_notempty, use_offsets, use_size_offsets);
1553    
1554          time_taken = clock() - start_time;          time_taken = clock() - start_time;
1555          fprintf(outfile, "Execute time %.3f milliseconds\n",          fprintf(outfile, "Execute time %.3f milliseconds\n",
1556            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
# Line 1193  while (!done) Line 1568  while (!done)
1568    
1569          if (extra == NULL)          if (extra == NULL)
1570            {            {
1571            extra = malloc(sizeof(pcre_extra));            extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1572            extra->flags = 0;            extra->flags = 0;
1573            }            }
1574          extra->flags |= PCRE_EXTRA_MATCH_LIMIT;          extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
# Line 1209  while (!done) Line 1584  while (!done)
1584              min = mid;              min = mid;
1585              mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;              mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1586              }              }
1587            else if (count >= 0 || count == PCRE_ERROR_NOMATCH)            else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
1588                                     count == PCRE_ERROR_PARTIAL)
1589              {              {
1590              if (mid == min + 1)              if (mid == min + 1)
1591                {                {
# Line 1232  while (!done) Line 1608  while (!done)
1608          {          {
1609          if (extra == NULL)          if (extra == NULL)
1610            {            {
1611            extra = malloc(sizeof(pcre_extra));            extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1612            extra->flags = 0;            extra->flags = 0;
1613            }            }
1614          extra->flags |= PCRE_EXTRA_CALLOUT_DATA;          extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
1615          extra->callout_data = (void *)callout_data;          extra->callout_data = &callout_data;
1616          count = pcre_exec(re, extra, (char *)bptr, len, start_offset,          count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
1617            options | g_notempty, use_offsets, use_size_offsets);            options | g_notempty, use_offsets, use_size_offsets);
1618          extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;          extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
# Line 1245  while (!done) Line 1621  while (!done)
1621        /* The normal case is just to do the match once, with the default        /* The normal case is just to do the match once, with the default
1622        value of match_limit. */        value of match_limit. */
1623    
1624        else count = pcre_exec(re, extra, (char *)bptr, len,  #if !defined NODFA
1625          start_offset, options | g_notempty, use_offsets, use_size_offsets);        else if (all_use_dfa || use_dfa)
1626            {
1627            int workspace[1000];
1628            count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
1629              options | g_notempty, use_offsets, use_size_offsets, workspace,
1630              sizeof(workspace)/sizeof(int));
1631            if (count == 0)
1632              {
1633              fprintf(outfile, "Matched, but too many subsidiary matches\n");
1634              count = use_size_offsets/2;
1635              }
1636            }
1637    #endif
1638    
1639        if (count == 0)        else
1640          {          {
1641          fprintf(outfile, "Matched, but too many substrings\n");          count = pcre_exec(re, extra, (char *)bptr, len,
1642          count = use_size_offsets/3;            start_offset, options | g_notempty, use_offsets, use_size_offsets);
1643            if (count == 0)
1644              {
1645              fprintf(outfile, "Matched, but too many substrings\n");
1646              count = use_size_offsets/3;
1647              }
1648          }          }
1649    
1650        /* Matched */        /* Matched */
# Line 1333  while (!done) Line 1726  while (!done)
1726            }            }
1727          }          }
1728    
1729          /* There was a partial match */
1730    
1731          else if (count == PCRE_ERROR_PARTIAL)
1732            {
1733            fprintf(outfile, "Partial match");
1734    #if !defined NODFA
1735            if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
1736              fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
1737                bptr + use_offsets[0]);
1738    #endif
1739            fprintf(outfile, "\n");
1740            break;  /* Out of the /g loop */
1741            }
1742    
1743        /* Failed to match. If this is a /g or /G loop and we previously set        /* Failed to match. If this is a /g or /G loop and we previously set
1744        g_notempty after a null match, this is not necessarily the end.        g_notempty after a null match, this is not necessarily the end.
1745        We want to advance the start offset, and continue. Fudge the offset        We want to advance the start offset, and continue. In the case of UTF-8
1746        values to achieve this. We won't be at the end of the string - that        matching, the advance must be one character, not one byte. Fudge the
1747        was checked before setting g_notempty. */        offset values to achieve this. We won't be at the end of the string -
1748          that was checked before setting g_notempty. */
1749    
1750        else        else
1751          {          {
1752          if (g_notempty != 0)          if (g_notempty != 0)
1753            {            {
1754              int onechar = 1;
1755            use_offsets[0] = start_offset;            use_offsets[0] = start_offset;
1756            use_offsets[1] = start_offset + 1;            if (use_utf8)
1757                {
1758                while (start_offset + onechar < len)
1759                  {
1760                  int tb = bptr[start_offset+onechar];
1761                  if (tb <= 127) break;
1762                  tb &= 0xc0;
1763                  if (tb != 0 && tb != 0xc0) onechar++;
1764                  }
1765                }
1766              use_offsets[1] = start_offset + onechar;
1767            }            }
1768          else          else
1769            {            {
1770            if (gmatched == 0)   /* Error if no previous matches */            if (count == PCRE_ERROR_NOMATCH)
1771              {              {
1772              if (count == -1) fprintf(outfile, "No match\n");              if (gmatched == 0) fprintf(outfile, "No match\n");
               else fprintf(outfile, "Error %d\n", count);  
1773              }              }
1774              else fprintf(outfile, "Error %d\n", count);
1775            break;  /* Out of the /g loop */            break;  /* Out of the /g loop */
1776            }            }
1777          }          }
# Line 1395  while (!done) Line 1814  while (!done)
1814    if (posix || do_posix) regfree(&preg);    if (posix || do_posix) regfree(&preg);
1815  #endif  #endif
1816    
1817    if (re != NULL) free(re);    if (re != NULL) new_free(re);
1818    if (extra != NULL) free(extra);    if (extra != NULL) new_free(extra);
1819    if (tables != NULL)    if (tables != NULL)
1820      {      {
1821      free((void *)tables);      new_free((void *)tables);
1822      setlocale(LC_CTYPE, "C");      setlocale(LC_CTYPE, "C");
1823      }      }
1824    }    }
1825    
1826  fprintf(outfile, "\n");  if (infile == stdin) fprintf(outfile, "\n");
1827  return 0;  
1828    EXIT:
1829    
1830    if (infile != NULL && infile != stdin) fclose(infile);
1831    if (outfile != NULL && outfile != stdout) fclose(outfile);
1832    
1833    free(buffer);
1834    free(dbuffer);
1835    free(pbuffer);
1836    free(offsets);
1837    
1838    return yield;
1839  }  }
1840    
1841  /* End */  /* End of pcretest.c */

Legend:
Removed from v.67  
changed lines
  Added in v.79

  ViewVC Help
Powered by ViewVC 1.1.5