/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 67 by nigel, Sat Feb 24 21:40:13 2007 UTC revision 77 by nigel, Sat Feb 24 21:40:45 2007 UTC
# Line 4  Line 4 
4    
5  /* This program was hacked up as a tester for PCRE. I really should have  /* This program was hacked up as a tester for PCRE. I really should have
6  written it more tidily in the first place. Will I ever learn? It has grown and  written it more tidily in the first place. Will I ever learn? It has grown and
7  been extended and consequently is now rather untidy in places. */  been extended and consequently is now rather, er, *very* untidy in places.
8    
9    -----------------------------------------------------------------------------
10    Redistribution and use in source and binary forms, with or without
11    modification, are permitted provided that the following conditions are met:
12    
13        * Redistributions of source code must retain the above copyright notice,
14          this list of conditions and the following disclaimer.
15    
16        * Redistributions in binary form must reproduce the above copyright
17          notice, this list of conditions and the following disclaimer in the
18          documentation and/or other materials provided with the distribution.
19    
20        * Neither the name of the University of Cambridge nor the names of its
21          contributors may be used to endorse or promote products derived from
22          this software without specific prior written permission.
23    
24    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34    POSSIBILITY OF SUCH DAMAGE.
35    -----------------------------------------------------------------------------
36    */
37    
38    
39  #include <ctype.h>  #include <ctype.h>
40  #include <stdio.h>  #include <stdio.h>
# Line 12  been extended and consequently is now ra Line 42  been extended and consequently is now ra
42  #include <stdlib.h>  #include <stdlib.h>
43  #include <time.h>  #include <time.h>
44  #include <locale.h>  #include <locale.h>
45    #include <errno.h>
 /* We need the internal info for displaying the results of pcre_study(). Also  
 for getting the opcodes for showing compiled code. */  
46    
47  #define PCRE_SPY        /* For Win32 build, import data, not export */  #define PCRE_SPY        /* For Win32 build, import data, not export */
48  #include "internal.h"  
49    /* We need the internal info for displaying the results of pcre_study() and
50    other internal data; pcretest also uses some of the fixed tables, and generally
51    has "inside information" compared to a program that strictly follows the PCRE
52    API. */
53    
54    #include "pcre_internal.h"
55    
56    
57  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
58  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 35  Makefile. */ Line 70  Makefile. */
70  #endif  #endif
71  #endif  #endif
72    
73  #define LOOPREPEAT 50000  #define LOOPREPEAT 500000
74    
75    #define BUFFER_SIZE 30000
76    #define PBUFFER_SIZE BUFFER_SIZE
77    #define DBUFFER_SIZE BUFFER_SIZE
78    
79    
80  static FILE *outfile;  static FILE *outfile;
# Line 45  static int callout_extra; Line 84  static int callout_extra;
84  static int callout_fail_count;  static int callout_fail_count;
85  static int callout_fail_id;  static int callout_fail_id;
86  static int first_callout;  static int first_callout;
87    static int show_malloc;
88  static int use_utf8;  static int use_utf8;
89  static size_t gotten_store;  static size_t gotten_store;
90    
91    static uschar *pbuffer = NULL;
   
 static int utf8_table1[] = {  
   0x0000007f, 0x000007ff, 0x0000ffff, 0x001fffff, 0x03ffffff, 0x7fffffff};  
   
 static int utf8_table2[] = {  
   0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};  
   
 static int utf8_table3[] = {  
   0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};  
   
   
   
 /*************************************************  
 *         Print compiled regex                   *  
 *************************************************/  
   
 /* The code for doing this is held in a separate file that is also included in  
 pcre.c when it is compiled with the debug switch. It defines a function called  
 print_internals(), which uses a table of opcode lengths defined by the macro  
 OP_LENGTHS, whose name must be OP_lengths. */  
   
 static uschar OP_lengths[] = { OP_LENGTHS };  
   
 #include "printint.c"  
92    
93    
94    
# Line 103  return(result); Line 119  return(result);
119    
120    
121    
 /*************************************************  
 *       Convert character value to UTF-8         *  
 *************************************************/  
   
 /* This function takes an integer value in the range 0 - 0x7fffffff  
 and encodes it as a UTF-8 character in 0 to 6 bytes.  
   
 Arguments:  
   cvalue     the character value  
   buffer     pointer to buffer for result - at least 6 bytes long  
   
 Returns:     number of characters placed in the buffer  
              -1 if input character is negative  
              0 if input character is positive but too big (only when  
              int is longer than 32 bits)  
 */  
   
 static int  
 ord2utf8(int cvalue, unsigned char *buffer)  
 {  
 register int i, j;  
 for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++)  
   if (cvalue <= utf8_table1[i]) break;  
 if (i >= sizeof(utf8_table1)/sizeof(int)) return 0;  
 if (cvalue < 0) return -1;  
   
 buffer += i;  
 for (j = i; j > 0; j--)  
  {  
  *buffer-- = 0x80 | (cvalue & 0x3f);  
  cvalue >>= 6;  
  }  
 *buffer = utf8_table2[i] | cvalue;  
 return i + 1;  
 }  
   
122    
123  /*************************************************  /*************************************************
124  *            Convert UTF-8 string to value       *  *            Convert UTF-8 string to value       *
# Line 174  if (i == 0 || i == 6) return 0;        / Line 154  if (i == 0 || i == 6) return 0;        /
154  /* i now has a value in the range 1-5 */  /* i now has a value in the range 1-5 */
155    
156  s = 6*i;  s = 6*i;
157  d = (c & utf8_table3[i]) << s;  d = (c & _pcre_utf8_table3[i]) << s;
158    
159  for (j = 0; j < i; j++)  for (j = 0; j < i; j++)
160    {    {
# Line 186  for (j = 0; j < i; j++) Line 166  for (j = 0; j < i; j++)
166    
167  /* Check that encoding was the correct unique one */  /* Check that encoding was the correct unique one */
168    
169  for (j = 0; j < sizeof(utf8_table1)/sizeof(int); j++)  for (j = 0; j < _pcre_utf8_table1_size; j++)
170    if (d <= utf8_table1[j]) break;    if (d <= _pcre_utf8_table1[j]) break;
171  if (j != i) return -(i+1);  if (j != i) return -(i+1);
172    
173  /* Valid value */  /* Valid value */
# Line 266  data is not zero. */ Line 246  data is not zero. */
246  static int callout(pcre_callout_block *cb)  static int callout(pcre_callout_block *cb)
247  {  {
248  FILE *f = (first_callout | callout_extra)? outfile : NULL;  FILE *f = (first_callout | callout_extra)? outfile : NULL;
249  int i, pre_start, post_start;  int i, pre_start, post_start, subject_length;
250    
251  if (callout_extra)  if (callout_extra)
252    {    {
# Line 297  pre_start = pchars((unsigned char *)cb-> Line 277  pre_start = pchars((unsigned char *)cb->
277  post_start = pchars((unsigned char *)(cb->subject + cb->start_match),  post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
278    cb->current_position - cb->start_match, f);    cb->current_position - cb->start_match, f);
279    
280    subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
281    
282  (void)pchars((unsigned char *)(cb->subject + cb->current_position),  (void)pchars((unsigned char *)(cb->subject + cb->current_position),
283    cb->subject_length - cb->current_position, f);    cb->subject_length - cb->current_position, f);
284    
285  if (f != NULL) fprintf(f, "\n");  if (f != NULL) fprintf(f, "\n");
286    
287  /* Always print appropriate indicators, with callout number if not already  /* Always print appropriate indicators, with callout number if not already
288  shown */  shown. For automatic callouts, show the pattern offset. */
289    
290  if (callout_extra) fprintf(outfile, "    ");  if (cb->callout_number == 255)
291    else fprintf(outfile, "%3d ", cb->callout_number);    {
292      fprintf(outfile, "%+3d ", cb->pattern_position);
293      if (cb->pattern_position > 99) fprintf(outfile, "\n    ");
294      }
295    else
296      {
297      if (callout_extra) fprintf(outfile, "    ");
298        else fprintf(outfile, "%3d ", cb->callout_number);
299      }
300    
301  for (i = 0; i < pre_start; i++) fprintf(outfile, " ");  for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
302  fprintf(outfile, "^");  fprintf(outfile, "^");
# Line 317  if (post_start > 0) Line 307  if (post_start > 0)
307    fprintf(outfile, "^");    fprintf(outfile, "^");
308    }    }
309    
310  fprintf(outfile, "\n");  for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
311      fprintf(outfile, " ");
312    
313    fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
314      pbuffer + cb->pattern_position);
315    
316    fprintf(outfile, "\n");
317  first_callout = 0;  first_callout = 0;
318    
319  if ((int)(cb->callout_data) != 0)  if (cb->callout_data != NULL)
320    {    {
321    fprintf(outfile, "Callout data = %d\n", (int)(cb->callout_data));    int callout_data = *((int *)(cb->callout_data));
322    return (int)(cb->callout_data);    if (callout_data != 0)
323        {
324        fprintf(outfile, "Callout data = %d\n", callout_data);
325        return callout_data;
326        }
327    }    }
328    
329  return (cb->callout_number != callout_fail_id)? 0 :  return (cb->callout_number != callout_fail_id)? 0 :
# Line 333  return (cb->callout_number != callout_fa Line 332  return (cb->callout_number != callout_fa
332    
333    
334  /*************************************************  /*************************************************
335  *            Local malloc function               *  *            Local malloc functions              *
336  *************************************************/  *************************************************/
337    
338  /* Alternative malloc function, to test functionality and show the size of the  /* Alternative malloc function, to test functionality and show the size of the
# Line 341  compiled re. */ Line 340  compiled re. */
340    
341  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
342  {  {
343    void *block = malloc(size);
344  gotten_store = size;  gotten_store = size;
345  return malloc(size);  if (show_malloc)
346      fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
347    return block;
348    }
349    
350    static void new_free(void *block)
351    {
352    if (show_malloc)
353      fprintf(outfile, "free             %p\n", block);
354    free(block);
355  }  }
356    
357    
358    /* For recursion malloc/free, to test stacking calls */
359    
360    static void *stack_malloc(size_t size)
361    {
362    void *block = malloc(size);
363    if (show_malloc)
364      fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
365    return block;
366    }
367    
368    static void stack_free(void *block)
369    {
370    if (show_malloc)
371      fprintf(outfile, "stack_free       %p\n", block);
372    free(block);
373    }
374    
375    
376  /*************************************************  /*************************************************
377  *          Call pcre_fullinfo()                  *  *          Call pcre_fullinfo()                  *
# Line 363  if ((rc = pcre_fullinfo(re, study, optio Line 389  if ((rc = pcre_fullinfo(re, study, optio
389    
390    
391  /*************************************************  /*************************************************
392    *         Byte flipping function                 *
393    *************************************************/
394    
395    static long int
396    byteflip(long int value, int n)
397    {
398    if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
399    return ((value & 0x000000ff) << 24) |
400           ((value & 0x0000ff00) <<  8) |
401           ((value & 0x00ff0000) >>  8) |
402           ((value & 0xff000000) >> 24);
403    }
404    
405    
406    
407    
408    /*************************************************
409  *                Main Program                    *  *                Main Program                    *
410  *************************************************/  *************************************************/
411    
# Line 381  int showinfo = 0; Line 424  int showinfo = 0;
424  int showstore = 0;  int showstore = 0;
425  int size_offsets = 45;  int size_offsets = 45;
426  int size_offsets_max;  int size_offsets_max;
427  int *offsets;  int *offsets = NULL;
428  #if !defined NOPOSIX  #if !defined NOPOSIX
429  int posix = 0;  int posix = 0;
430  #endif  #endif
431  int debug = 0;  int debug = 0;
432  int done = 0;  int done = 0;
433  unsigned char buffer[30000];  int all_use_dfa = 0;
434  unsigned char dbuffer[1024];  int yield = 0;
435    
436    unsigned char *buffer;
437    unsigned char *dbuffer;
438    
439  /* Static so that new_malloc can use it. */  /* Get buffers from malloc() so that Electric Fence will check their misuse
440    when I am debugging. */
441    
442    buffer = (unsigned char *)malloc(BUFFER_SIZE);
443    dbuffer = (unsigned char *)malloc(DBUFFER_SIZE);
444    pbuffer = (unsigned char *)malloc(PBUFFER_SIZE);
445    
446    /* The outfile variable is static so that new_malloc can use it. The _setmode()
447    stuff is some magic that I don't understand, but which apparently does good
448    things in Windows. It's related to line terminations.  */
449    
450    #if defined(_WIN32) || defined(WIN32)
451    _setmode( _fileno( stdout ), 0x8000 );
452    #endif  /* defined(_WIN32) || defined(WIN32) */
453    
454  outfile = stdout;  outfile = stdout;
455    
# Line 405  while (argc > 1 && argv[op][0] == '-') Line 464  while (argc > 1 && argv[op][0] == '-')
464    else if (strcmp(argv[op], "-t") == 0) timeit = 1;    else if (strcmp(argv[op], "-t") == 0) timeit = 1;
465    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
466    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
467      else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
468    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
469        ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),        ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
470          *endptr == 0))          *endptr == 0))
# Line 422  while (argc > 1 && argv[op][0] == '-') Line 482  while (argc > 1 && argv[op][0] == '-')
482      printf("Compiled with\n");      printf("Compiled with\n");
483      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
484      printf("  %sUTF-8 support\n", rc? "" : "No ");      printf("  %sUTF-8 support\n", rc? "" : "No ");
485        (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
486        printf("  %sUnicode properties support\n", rc? "" : "No ");
487      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
488      printf("  Newline character is %s\n", (rc == '\r')? "CR" : "LF");      printf("  Newline character is %s\n", (rc == '\r')? "CR" : "LF");
489      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
# Line 430  while (argc > 1 && argv[op][0] == '-') Line 492  while (argc > 1 && argv[op][0] == '-')
492      printf("  POSIX malloc threshold = %d\n", rc);      printf("  POSIX malloc threshold = %d\n", rc);
493      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
494      printf("  Default match limit = %d\n", rc);      printf("  Default match limit = %d\n", rc);
495        (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
496        printf("  Match recursion uses %s\n", rc? "stack" : "heap");
497      exit(0);      exit(0);
498      }      }
499    else    else
# Line 437  while (argc > 1 && argv[op][0] == '-') Line 501  while (argc > 1 && argv[op][0] == '-')
501      printf("** Unknown or malformed option %s\n", argv[op]);      printf("** Unknown or malformed option %s\n", argv[op]);
502      printf("Usage:   pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");      printf("Usage:   pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");
503      printf("  -C     show PCRE compile-time options and exit\n");      printf("  -C     show PCRE compile-time options and exit\n");
504      printf("  -d     debug: show compiled code; implies -i\n"      printf("  -d     debug: show compiled code; implies -i\n");
505             "  -i     show information about compiled pattern\n"      printf("  -dfa   force DFA matching for all subjects\n");
506        printf("  -i     show information about compiled pattern\n"
507               "  -m     output memory used information\n"
508             "  -o <n> set size of offsets vector to <n>\n");             "  -o <n> set size of offsets vector to <n>\n");
509  #if !defined NOPOSIX  #if !defined NOPOSIX
510      printf("  -p     use POSIX interface\n");      printf("  -p     use POSIX interface\n");
511  #endif  #endif
512      printf("  -s     output store information\n"      printf("  -s     output store (memory) used information\n"
513             "  -t     time compilation and execution\n");             "  -t     time compilation and execution\n");
514      return 1;      yield = 1;
515        goto EXIT;
516      }      }
517    op++;    op++;
518    argc--;    argc--;
# Line 454  while (argc > 1 && argv[op][0] == '-') Line 521  while (argc > 1 && argv[op][0] == '-')
521  /* Get the store for the offsets vector, and remember what it was */  /* Get the store for the offsets vector, and remember what it was */
522    
523  size_offsets_max = size_offsets;  size_offsets_max = size_offsets;
524  offsets = malloc(size_offsets_max * sizeof(int));  offsets = (int *)malloc(size_offsets_max * sizeof(int));
525  if (offsets == NULL)  if (offsets == NULL)
526    {    {
527    printf("** Failed to get %d bytes of memory for offsets vector\n",    printf("** Failed to get %d bytes of memory for offsets vector\n",
528      size_offsets_max * sizeof(int));      size_offsets_max * sizeof(int));
529    return 1;    yield = 1;
530      goto EXIT;
531    }    }
532    
533  /* Sort out the input and output files */  /* Sort out the input and output files */
534    
535  if (argc > 1)  if (argc > 1)
536    {    {
537    infile = fopen(argv[op], "r");    infile = fopen(argv[op], "rb");
538    if (infile == NULL)    if (infile == NULL)
539      {      {
540      printf("** Failed to open %s\n", argv[op]);      printf("** Failed to open %s\n", argv[op]);
541      return 1;      yield = 1;
542        goto EXIT;
543      }      }
544    }    }
545    
546  if (argc > 2)  if (argc > 2)
547    {    {
548    outfile = fopen(argv[op+1], "w");    outfile = fopen(argv[op+1], "wb");
549    if (outfile == NULL)    if (outfile == NULL)
550      {      {
551      printf("** Failed to open %s\n", argv[op+1]);      printf("** Failed to open %s\n", argv[op+1]);
552      return 1;      yield = 1;
553        goto EXIT;
554      }      }
555    }    }
556    
557  /* Set alternative malloc function */  /* Set alternative malloc function */
558    
559  pcre_malloc = new_malloc;  pcre_malloc = new_malloc;
560    pcre_free = new_free;
561    pcre_stack_malloc = stack_malloc;
562    pcre_stack_free = stack_free;
563    
564  /* Heading line, then prompt for first regex if stdin */  /* Heading line, then prompt for first regex if stdin */
565    
# Line 506  while (!done) Line 579  while (!done)
579    
580    const char *error;    const char *error;
581    unsigned char *p, *pp, *ppp;    unsigned char *p, *pp, *ppp;
582      unsigned char *to_file = NULL;
583    const unsigned char *tables = NULL;    const unsigned char *tables = NULL;
584      unsigned long int true_size, true_study_size = 0;
585      size_t size, regex_gotten_store;
586    int do_study = 0;    int do_study = 0;
587    int do_debug = debug;    int do_debug = debug;
588    int do_G = 0;    int do_G = 0;
589    int do_g = 0;    int do_g = 0;
590    int do_showinfo = showinfo;    int do_showinfo = showinfo;
591    int do_showrest = 0;    int do_showrest = 0;
592      int do_flip = 0;
593    int erroroffset, len, delimiter;    int erroroffset, len, delimiter;
594    
595    use_utf8 = 0;    use_utf8 = 0;
596    
597    if (infile == stdin) printf("  re> ");    if (infile == stdin) printf("  re> ");
598    if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;    if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL) break;
599    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
600    fflush(outfile);    fflush(outfile);
601    
# Line 526  while (!done) Line 603  while (!done)
603    while (isspace(*p)) p++;    while (isspace(*p)) p++;
604    if (*p == 0) continue;    if (*p == 0) continue;
605    
606    /* Get the delimiter and seek the end of the pattern; if is isn't    /* See if the pattern is to be loaded pre-compiled from a file. */
607    complete, read more. */  
608      if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
609        {
610        unsigned long int magic;
611        uschar sbuf[8];
612        FILE *f;
613    
614        p++;
615        pp = p + (int)strlen((char *)p);
616        while (isspace(pp[-1])) pp--;
617        *pp = 0;
618    
619        f = fopen((char *)p, "rb");
620        if (f == NULL)
621          {
622          fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
623          continue;
624          }
625    
626        if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
627    
628        true_size =
629          (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
630        true_study_size =
631          (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
632    
633        re = (real_pcre *)new_malloc(true_size);
634        regex_gotten_store = gotten_store;
635    
636        if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
637    
638        magic = ((real_pcre *)re)->magic_number;
639        if (magic != MAGIC_NUMBER)
640          {
641          if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
642            {
643            do_flip = 1;
644            }
645          else
646            {
647            fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
648            fclose(f);
649            continue;
650            }
651          }
652    
653        fprintf(outfile, "Compiled regex%s loaded from %s\n",
654          do_flip? " (byte-inverted)" : "", p);
655    
656        /* Need to know if UTF-8 for printing data strings */
657    
658        new_info(re, NULL, PCRE_INFO_OPTIONS, &options);
659        use_utf8 = (options & PCRE_UTF8) != 0;
660    
661        /* Now see if there is any following study data */
662    
663        if (true_study_size != 0)
664          {
665          pcre_study_data *psd;
666    
667          extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
668          extra->flags = PCRE_EXTRA_STUDY_DATA;
669    
670          psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
671          extra->study_data = psd;
672    
673          if (fread(psd, 1, true_study_size, f) != true_study_size)
674            {
675            FAIL_READ:
676            fprintf(outfile, "Failed to read data from %s\n", p);
677            if (extra != NULL) new_free(extra);
678            if (re != NULL) new_free(re);
679            fclose(f);
680            continue;
681            }
682          fprintf(outfile, "Study data loaded from %s\n", p);
683          do_study = 1;     /* To get the data output if requested */
684          }
685        else fprintf(outfile, "No study data\n");
686    
687        fclose(f);
688        goto SHOW_INFO;
689        }
690    
691      /* In-line pattern (the usual case). Get the delimiter and seek the end of
692      the pattern; if is isn't complete, read more. */
693    
694    delimiter = *p++;    delimiter = *p++;
695    
# Line 549  while (!done) Line 711  while (!done)
711        }        }
712      if (*pp != 0) break;      if (*pp != 0) break;
713    
714      len = sizeof(buffer) - (pp - buffer);      len = BUFFER_SIZE - (pp - buffer);
715      if (len < 256)      if (len < 256)
716        {        {
717        fprintf(outfile, "** Expression too long - missing delimiter?\n");        fprintf(outfile, "** Expression too long - missing delimiter?\n");
# Line 572  while (!done) Line 734  while (!done)
734    
735    if (pp[1] == '\\') *pp++ = '\\';    if (pp[1] == '\\') *pp++ = '\\';
736    
737    /* Terminate the pattern at the delimiter */    /* Terminate the pattern at the delimiter, and save a copy of the pattern
738      for callouts. */
739    
740    *pp++ = 0;    *pp++ = 0;
741      strcpy((char *)pbuffer, (char *)p);
742    
743    /* Look for options after final delimiter */    /* Look for options after final delimiter */
744    
# Line 586  while (!done) Line 750  while (!done)
750      {      {
751      switch (*pp++)      switch (*pp++)
752        {        {
753          case 'f': options |= PCRE_FIRSTLINE; break;
754        case 'g': do_g = 1; break;        case 'g': do_g = 1; break;
755        case 'i': options |= PCRE_CASELESS; break;        case 'i': options |= PCRE_CASELESS; break;
756        case 'm': options |= PCRE_MULTILINE; break;        case 'm': options |= PCRE_MULTILINE; break;
# Line 594  while (!done) Line 759  while (!done)
759    
760        case '+': do_showrest = 1; break;        case '+': do_showrest = 1; break;
761        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
762          case 'C': options |= PCRE_AUTO_CALLOUT; break;
763        case 'D': do_debug = do_showinfo = 1; break;        case 'D': do_debug = do_showinfo = 1; break;
764        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
765          case 'F': do_flip = 1; break;
766        case 'G': do_G = 1; break;        case 'G': do_G = 1; break;
767        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
768        case 'M': log_store = 1; break;        case 'M': log_store = 1; break;
# Line 609  while (!done) Line 776  while (!done)
776        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
777        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
778        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
779          case '?': options |= PCRE_NO_UTF8_CHECK; break;
780    
781        case 'L':        case 'L':
782        ppp = pp;        ppp = pp;
783        while (*ppp != '\n' && *ppp != ' ') ppp++;        /* The '\r' test here is so that it works on Windows */
784          while (*ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
785        *ppp = 0;        *ppp = 0;
786        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
787          {          {
# Line 623  while (!done) Line 792  while (!done)
792        pp = ppp;        pp = ppp;
793        break;        break;
794    
795        case '\n': case ' ': break;        case '>':
796          to_file = pp;
797          while (*pp != 0) pp++;
798          while (isspace(pp[-1])) pp--;
799          *pp = 0;
800          break;
801    
802          case '\r':                      /* So that it works in Windows */
803          case '\n':
804          case ' ':
805          break;
806    
807        default:        default:
808        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
809        goto SKIP_DATA;        goto SKIP_DATA;
# Line 639  while (!done) Line 819  while (!done)
819      {      {
820      int rc;      int rc;
821      int cflags = 0;      int cflags = 0;
822    
823      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
824      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
825        if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
826      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
827    
828      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 648  while (!done) Line 830  while (!done)
830    
831      if (rc != 0)      if (rc != 0)
832        {        {
833        (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));        (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);
834        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
835        goto SKIP_DATA;        goto SKIP_DATA;
836        }        }
# Line 689  while (!done) Line 871  while (!done)
871          {          {
872          for (;;)          for (;;)
873            {            {
874            if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)            if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)
875              {              {
876              done = 1;              done = 1;
877              goto CONTINUE;              goto CONTINUE;
# Line 713  while (!done) Line 895  while (!done)
895                sizeof(real_pcre) -                sizeof(real_pcre) -
896                ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));                ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
897    
898        /* Extract the size for possible writing before possibly flipping it,
899        and remember the store that was got. */
900    
901        true_size = ((real_pcre *)re)->size;
902        regex_gotten_store = gotten_store;
903    
904        /* If /S was present, study the regexp to generate additional info to
905        help with the matching. */
906    
907        if (do_study)
908          {
909          if (timeit)
910            {
911            register int i;
912            clock_t time_taken;
913            clock_t start_time = clock();
914            for (i = 0; i < LOOPREPEAT; i++)
915              extra = pcre_study(re, study_options, &error);
916            time_taken = clock() - start_time;
917            if (extra != NULL) free(extra);
918            fprintf(outfile, "  Study time %.3f milliseconds\n",
919              (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
920                (double)CLOCKS_PER_SEC);
921            }
922          extra = pcre_study(re, study_options, &error);
923          if (error != NULL)
924            fprintf(outfile, "Failed to study: %s\n", error);
925          else if (extra != NULL)
926            true_study_size = ((pcre_study_data *)(extra->study_data))->size;
927          }
928    
929        /* If the 'F' option was present, we flip the bytes of all the integer
930        fields in the regex data block and the study block. This is to make it
931        possible to test PCRE's handling of byte-flipped patterns, e.g. those
932        compiled on a different architecture. */
933    
934        if (do_flip)
935          {
936          real_pcre *rre = (real_pcre *)re;
937          rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));
938          rre->size = byteflip(rre->size, sizeof(rre->size));
939          rre->options = byteflip(rre->options, sizeof(rre->options));
940          rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));
941          rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));
942          rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));
943          rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));
944          rre->name_table_offset = byteflip(rre->name_table_offset,
945            sizeof(rre->name_table_offset));
946          rre->name_entry_size = byteflip(rre->name_entry_size,
947            sizeof(rre->name_entry_size));
948          rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));
949    
950          if (extra != NULL)
951            {
952            pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
953            rsd->size = byteflip(rsd->size, sizeof(rsd->size));
954            rsd->options = byteflip(rsd->options, sizeof(rsd->options));
955            }
956          }
957    
958        /* Extract information from the compiled data if required */
959    
960        SHOW_INFO:
961    
962      if (do_showinfo)      if (do_showinfo)
963        {        {
964        unsigned long int get_options;        unsigned long int get_options, all_options;
965        int old_first_char, old_options, old_count;        int old_first_char, old_options, old_count;
966        int count, backrefmax, first_char, need_char;        int count, backrefmax, first_char, need_char;
967        int nameentrysize, namecount;        int nameentrysize, namecount;
968        const uschar *nametable;        const uschar *nametable;
       size_t size;  
969    
970        if (do_debug)        if (do_debug)
971          {          {
972          fprintf(outfile, "------------------------------------------------------------------\n");          fprintf(outfile, "------------------------------------------------------------------\n");
973          print_internals(re, outfile);          _pcre_printint(re, outfile);
974          }          }
975    
976        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
# Line 756  while (!done) Line 1001  while (!done)
1001              get_options, old_options);              get_options, old_options);
1002          }          }
1003    
1004        if (size != gotten_store) fprintf(outfile,        if (size != regex_gotten_store) fprintf(outfile,
1005          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1006          size, gotten_store);          (int)size, (int)regex_gotten_store);
1007    
1008        fprintf(outfile, "Capturing subpattern count = %d\n", count);        fprintf(outfile, "Capturing subpattern count = %d\n", count);
1009        if (backrefmax > 0)        if (backrefmax > 0)
# Line 776  while (!done) Line 1021  while (!done)
1021            }            }
1022          }          }
1023    
1024          /* The NOPARTIAL bit is a private bit in the options, so we have
1025          to fish it out via out back door */
1026    
1027          all_options = ((real_pcre *)re)->options;
1028          if (do_flip)
1029            {
1030            all_options = byteflip(all_options, sizeof(all_options));
1031            }
1032    
1033          if ((all_options & PCRE_NOPARTIAL) != 0)
1034            fprintf(outfile, "Partial matching not supported\n");
1035    
1036        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
1037          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s\n",
1038            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1039            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1040            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1041            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1042              ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1043            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1044            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1045            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1046            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1047            ((get_options & PCRE_UTF8) != 0)? " utf8" : "");            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1048              ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "");
1049    
1050        if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)        if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)
1051          fprintf(outfile, "Case state changes\n");          fprintf(outfile, "Case state changes\n");
# Line 824  while (!done) Line 1083  while (!done)
1083          else          else
1084            fprintf(outfile, "Need char = %d%s\n", ch, caseless);            fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1085          }          }
       }  
1086    
1087      /* If /S was present, study the regexp to generate additional info to        /* Don't output study size; at present it is in any case a fixed
1088      help with the matching. */        value, but it varies, depending on the computer architecture, and
1089          so messes up the test suite. (And with the /F option, it might be
1090          flipped.) */
1091    
1092      if (do_study)        if (do_study)
       {  
       if (timeit)  
1093          {          {
1094          register int i;          if (extra == NULL)
1095          clock_t time_taken;            fprintf(outfile, "Study returned NULL\n");
1096          clock_t start_time = clock();          else
1097          for (i = 0; i < LOOPREPEAT; i++)            {
1098            extra = pcre_study(re, study_options, &error);            uschar *start_bits = NULL;
1099          time_taken = clock() - start_time;            new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1100          if (extra != NULL) free(extra);  
1101          fprintf(outfile, "  Study time %.3f milliseconds\n",            if (start_bits == NULL)
1102            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /              fprintf(outfile, "No starting byte set\n");
1103              (double)CLOCKS_PER_SEC);            else
1104                {
1105                int i;
1106                int c = 24;
1107                fprintf(outfile, "Starting byte set: ");
1108                for (i = 0; i < 256; i++)
1109                  {
1110                  if ((start_bits[i/8] & (1<<(i&7))) != 0)
1111                    {
1112                    if (c > 75)
1113                      {
1114                      fprintf(outfile, "\n  ");
1115                      c = 2;
1116                      }
1117                    if (isprint(i) && i != ' ')
1118                      {
1119                      fprintf(outfile, "%c ", i);
1120                      c += 2;
1121                      }
1122                    else
1123                      {
1124                      fprintf(outfile, "\\x%02x ", i);
1125                      c += 5;
1126                      }
1127                    }
1128                  }
1129                fprintf(outfile, "\n");
1130                }
1131              }
1132          }          }
1133          }
1134    
1135        extra = pcre_study(re, study_options, &error);      /* If the '>' option was present, we write out the regex to a file, and
1136        if (error != NULL)      that is all. The first 8 bytes of the file are the regex length and then
1137          fprintf(outfile, "Failed to study: %s\n", error);      the study length, in big-endian order. */
       else if (extra == NULL)  
         fprintf(outfile, "Study returned NULL\n");  
1138    
1139        else if (do_showinfo)      if (to_file != NULL)
1140          {
1141          FILE *f = fopen((char *)to_file, "wb");
1142          if (f == NULL)
1143          {          {
1144          size_t size;          fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1145          uschar *start_bits = NULL;          }
1146          new_info(re, extra, PCRE_INFO_STUDYSIZE, &size);        else
1147          new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);          {
1148          fprintf(outfile, "Study size = %d\n", size);          uschar sbuf[8];
1149          if (start_bits == NULL)          sbuf[0] = (true_size >> 24)  & 255;
1150            fprintf(outfile, "No starting character set\n");          sbuf[1] = (true_size >> 16)  & 255;
1151            sbuf[2] = (true_size >>  8)  & 255;
1152            sbuf[3] = (true_size)  & 255;
1153    
1154            sbuf[4] = (true_study_size >> 24)  & 255;
1155            sbuf[5] = (true_study_size >> 16)  & 255;
1156            sbuf[6] = (true_study_size >>  8)  & 255;
1157            sbuf[7] = (true_study_size)  & 255;
1158    
1159            if (fwrite(sbuf, 1, 8, f) < 8 ||
1160                fwrite(re, 1, true_size, f) < true_size)
1161              {
1162              fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1163              }
1164          else          else
1165            {            {
1166            int i;            fprintf(outfile, "Compiled regex written to %s\n", to_file);
1167            int c = 24;            if (extra != NULL)
           fprintf(outfile, "Starting character set: ");  
           for (i = 0; i < 256; i++)  
1168              {              {
1169              if ((start_bits[i/8] & (1<<(i%8))) != 0)              if (fwrite(extra->study_data, 1, true_study_size, f) <
1170                    true_study_size)
1171                {                {
1172                if (c > 75)                fprintf(outfile, "Write error on %s: %s\n", to_file,
1173                  {                  strerror(errno));
                 fprintf(outfile, "\n  ");  
                 c = 2;  
                 }  
               if (isprint(i) && i != ' ')  
                 {  
                 fprintf(outfile, "%c ", i);  
                 c += 2;  
                 }  
               else  
                 {  
                 fprintf(outfile, "\\x%02x ", i);  
                 c += 5;  
                 }  
1174                }                }
1175                else fprintf(outfile, "Study data written to %s\n", to_file);
1176              }              }
           fprintf(outfile, "\n");  
1177            }            }
1178            fclose(f);
1179          }          }
1180    
1181          new_free(re);
1182          if (extra != NULL) new_free(extra);
1183          if (tables != NULL) new_free((void *)tables);
1184          continue;  /* With next regex */
1185        }        }
1186      }      }        /* End of non-POSIX compile */
1187    
1188    /* Read data lines and test them */    /* Read data lines and test them */
1189    
# Line 910  while (!done) Line 1203  while (!done)
1203      int gmatched = 0;      int gmatched = 0;
1204      int start_offset = 0;      int start_offset = 0;
1205      int g_notempty = 0;      int g_notempty = 0;
1206        int use_dfa = 0;
1207    
1208      options = 0;      options = 0;
1209    
# Line 919  while (!done) Line 1213  while (!done)
1213      callout_count = 0;      callout_count = 0;
1214      callout_fail_count = 999999;      callout_fail_count = 999999;
1215      callout_fail_id = -1;      callout_fail_id = -1;
1216        show_malloc = 0;
1217    
1218      if (infile == stdin) printf("data> ");      if (infile == stdin) printf("data> ");
1219      if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)      if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)
1220        {        {
1221        done = 1;        done = 1;
1222        goto CONTINUE;        goto CONTINUE;
# Line 974  while (!done) Line 1269  while (!done)
1269              {              {
1270              unsigned char buff8[8];              unsigned char buff8[8];
1271              int ii, utn;              int ii, utn;
1272              utn = ord2utf8(c, buff8);              utn = _pcre_ord2utf8(c, buff8);
1273              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1274              c = buff8[ii];   /* Last byte */              c = buff8[ii];   /* Last byte */
1275              p = pt + 1;              p = pt + 1;
# Line 993  while (!done) Line 1288  while (!done)
1288            }            }
1289          break;          break;
1290    
1291          case 0:   /* Allows for an empty line */          case 0:   /* \ followed by EOF allows for an empty line */
1292          p--;          p--;
1293          continue;          continue;
1294    
1295            case '>':
1296            while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1297            continue;
1298    
1299          case 'A':  /* Option setting */          case 'A':  /* Option setting */
1300          options |= PCRE_ANCHORED;          options |= PCRE_ANCHORED;
1301          continue;          continue;
# Line 1058  while (!done) Line 1357  while (!done)
1357            }            }
1358          continue;          continue;
1359    
1360            case 'D':
1361            if (posix || do_posix)
1362              printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1363            else
1364              use_dfa = 1;
1365            continue;
1366    
1367            case 'F':
1368            options |= PCRE_DFA_SHORTEST;
1369            continue;
1370    
1371          case 'G':          case 'G':
1372          if (isdigit(*p))          if (isdigit(*p))
1373            {            {
# Line 1095  while (!done) Line 1405  while (!done)
1405            {            {
1406            size_offsets_max = n;            size_offsets_max = n;
1407            free(offsets);            free(offsets);
1408            use_offsets = offsets = malloc(size_offsets_max * sizeof(int));            use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1409            if (offsets == NULL)            if (offsets == NULL)
1410              {              {
1411              printf("** Failed to get %d bytes of memory for offsets vector\n",              printf("** Failed to get %d bytes of memory for offsets vector\n",
1412                size_offsets_max * sizeof(int));                size_offsets_max * sizeof(int));
1413              return 1;              yield = 1;
1414                goto EXIT;
1415              }              }
1416            }            }
1417          use_size_offsets = n;          use_size_offsets = n;
1418          if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */          if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */
1419          continue;          continue;
1420    
1421            case 'P':
1422            options |= PCRE_PARTIAL;
1423            continue;
1424    
1425            case 'R':
1426            options |= PCRE_DFA_RESTART;
1427            continue;
1428    
1429            case 'S':
1430            show_malloc = 1;
1431            continue;
1432    
1433          case 'Z':          case 'Z':
1434          options |= PCRE_NOTEOL;          options |= PCRE_NOTEOL;
1435          continue;          continue;
1436    
1437            case '?':
1438            options |= PCRE_NO_UTF8_CHECK;
1439            continue;
1440          }          }
1441        *q++ = c;        *q++ = c;
1442        }        }
1443      *q = 0;      *q = 0;
1444      len = q - dbuffer;      len = q - dbuffer;
1445    
1446        if ((all_use_dfa || use_dfa) && find_match_limit)
1447          {
1448          printf("**Match limit not relevant for DFA matching: ignored\n");
1449          find_match_limit = 0;
1450          }
1451    
1452      /* Handle matching via the POSIX interface, which does not      /* Handle matching via the POSIX interface, which does not
1453      support timing or playing with the match limit or callout data. */      support timing or playing with the match limit or callout data. */
1454    
# Line 1126  while (!done) Line 1459  while (!done)
1459        int eflags = 0;        int eflags = 0;
1460        regmatch_t *pmatch = NULL;        regmatch_t *pmatch = NULL;
1461        if (use_size_offsets > 0)        if (use_size_offsets > 0)
1462          pmatch = malloc(sizeof(regmatch_t) * use_size_offsets);          pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
1463        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1464        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1465    
# Line 1134  while (!done) Line 1467  while (!done)
1467    
1468        if (rc != 0)        if (rc != 0)
1469          {          {
1470          (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));          (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);
1471          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1472          }          }
1473        else        else
# Line 1173  while (!done) Line 1506  while (!done)
1506          register int i;          register int i;
1507          clock_t time_taken;          clock_t time_taken;
1508          clock_t start_time = clock();          clock_t start_time = clock();
1509    
1510            if (all_use_dfa || use_dfa)
1511              {
1512              int workspace[1000];
1513              for (i = 0; i < LOOPREPEAT; i++)
1514                count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
1515                  options | g_notempty, use_offsets, use_size_offsets, workspace,
1516                  sizeof(workspace)/sizeof(int));
1517              }
1518            else
1519    
1520          for (i = 0; i < LOOPREPEAT; i++)          for (i = 0; i < LOOPREPEAT; i++)
1521            count = pcre_exec(re, extra, (char *)bptr, len,            count = pcre_exec(re, extra, (char *)bptr, len,
1522              start_offset, options | g_notempty, use_offsets, use_size_offsets);              start_offset, options | g_notempty, use_offsets, use_size_offsets);
1523    
1524          time_taken = clock() - start_time;          time_taken = clock() - start_time;
1525          fprintf(outfile, "Execute time %.3f milliseconds\n",          fprintf(outfile, "Execute time %.3f milliseconds\n",
1526            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
# Line 1193  while (!done) Line 1538  while (!done)
1538    
1539          if (extra == NULL)          if (extra == NULL)
1540            {            {
1541            extra = malloc(sizeof(pcre_extra));            extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1542            extra->flags = 0;            extra->flags = 0;
1543            }            }
1544          extra->flags |= PCRE_EXTRA_MATCH_LIMIT;          extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
# Line 1209  while (!done) Line 1554  while (!done)
1554              min = mid;              min = mid;
1555              mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;              mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1556              }              }
1557            else if (count >= 0 || count == PCRE_ERROR_NOMATCH)            else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
1558                                     count == PCRE_ERROR_PARTIAL)
1559              {              {
1560              if (mid == min + 1)              if (mid == min + 1)
1561                {                {
# Line 1232  while (!done) Line 1578  while (!done)
1578          {          {
1579          if (extra == NULL)          if (extra == NULL)
1580            {            {
1581            extra = malloc(sizeof(pcre_extra));            extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1582            extra->flags = 0;            extra->flags = 0;
1583            }            }
1584          extra->flags |= PCRE_EXTRA_CALLOUT_DATA;          extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
1585          extra->callout_data = (void *)callout_data;          extra->callout_data = &callout_data;
1586          count = pcre_exec(re, extra, (char *)bptr, len, start_offset,          count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
1587            options | g_notempty, use_offsets, use_size_offsets);            options | g_notempty, use_offsets, use_size_offsets);
1588          extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;          extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
# Line 1245  while (!done) Line 1591  while (!done)
1591        /* The normal case is just to do the match once, with the default        /* The normal case is just to do the match once, with the default
1592        value of match_limit. */        value of match_limit. */
1593    
1594        else count = pcre_exec(re, extra, (char *)bptr, len,        else if (all_use_dfa || use_dfa)
1595          start_offset, options | g_notempty, use_offsets, use_size_offsets);          {
1596            int workspace[1000];
1597            count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
1598              options | g_notempty, use_offsets, use_size_offsets, workspace,
1599              sizeof(workspace)/sizeof(int));
1600            if (count == 0)
1601              {
1602              fprintf(outfile, "Matched, but too many subsidiary matches\n");
1603              count = use_size_offsets/2;
1604              }
1605            }
1606    
1607        if (count == 0)        else
1608          {          {
1609          fprintf(outfile, "Matched, but too many substrings\n");          count = pcre_exec(re, extra, (char *)bptr, len,
1610          count = use_size_offsets/3;            start_offset, options | g_notempty, use_offsets, use_size_offsets);
1611            if (count == 0)
1612              {
1613              fprintf(outfile, "Matched, but too many substrings\n");
1614              count = use_size_offsets/3;
1615              }
1616          }          }
1617    
1618        /* Matched */        /* Matched */
# Line 1333  while (!done) Line 1694  while (!done)
1694            }            }
1695          }          }
1696    
1697          /* There was a partial match */
1698    
1699          else if (count == PCRE_ERROR_PARTIAL)
1700            {
1701            fprintf(outfile, "Partial match");
1702            if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
1703              fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
1704                bptr + use_offsets[0]);
1705            fprintf(outfile, "\n");
1706            break;  /* Out of the /g loop */
1707            }
1708    
1709        /* Failed to match. If this is a /g or /G loop and we previously set        /* Failed to match. If this is a /g or /G loop and we previously set
1710        g_notempty after a null match, this is not necessarily the end.        g_notempty after a null match, this is not necessarily the end.
1711        We want to advance the start offset, and continue. Fudge the offset        We want to advance the start offset, and continue. In the case of UTF-8
1712        values to achieve this. We won't be at the end of the string - that        matching, the advance must be one character, not one byte. Fudge the
1713        was checked before setting g_notempty. */        offset values to achieve this. We won't be at the end of the string -
1714          that was checked before setting g_notempty. */
1715    
1716        else        else
1717          {          {
1718          if (g_notempty != 0)          if (g_notempty != 0)
1719            {            {
1720              int onechar = 1;
1721            use_offsets[0] = start_offset;            use_offsets[0] = start_offset;
1722            use_offsets[1] = start_offset + 1;            if (use_utf8)
1723                {
1724                while (start_offset + onechar < len)
1725                  {
1726                  int tb = bptr[start_offset+onechar];
1727                  if (tb <= 127) break;
1728                  tb &= 0xc0;
1729                  if (tb != 0 && tb != 0xc0) onechar++;
1730                  }
1731                }
1732              use_offsets[1] = start_offset + onechar;
1733            }            }
1734          else          else
1735            {            {
1736            if (gmatched == 0)   /* Error if no previous matches */            if (count == PCRE_ERROR_NOMATCH)
1737              {              {
1738              if (count == -1) fprintf(outfile, "No match\n");              if (gmatched == 0) fprintf(outfile, "No match\n");
               else fprintf(outfile, "Error %d\n", count);  
1739              }              }
1740              else fprintf(outfile, "Error %d\n", count);
1741            break;  /* Out of the /g loop */            break;  /* Out of the /g loop */
1742            }            }
1743          }          }
# Line 1395  while (!done) Line 1780  while (!done)
1780    if (posix || do_posix) regfree(&preg);    if (posix || do_posix) regfree(&preg);
1781  #endif  #endif
1782    
1783    if (re != NULL) free(re);    if (re != NULL) new_free(re);
1784    if (extra != NULL) free(extra);    if (extra != NULL) new_free(extra);
1785    if (tables != NULL)    if (tables != NULL)
1786      {      {
1787      free((void *)tables);      new_free((void *)tables);
1788      setlocale(LC_CTYPE, "C");      setlocale(LC_CTYPE, "C");
1789      }      }
1790    }    }
1791    
1792  fprintf(outfile, "\n");  if (infile == stdin) fprintf(outfile, "\n");
1793  return 0;  
1794    EXIT:
1795    
1796    if (infile != NULL && infile != stdin) fclose(infile);
1797    if (outfile != NULL && outfile != stdout) fclose(outfile);
1798    
1799    free(buffer);
1800    free(dbuffer);
1801    free(pbuffer);
1802    free(offsets);
1803    
1804    return yield;
1805  }  }
1806    
1807  /* End */  /* End of pcretest.c */

Legend:
Removed from v.67  
changed lines
  Added in v.77

  ViewVC Help
Powered by ViewVC 1.1.5