/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 7 by nigel, Sat Feb 24 21:38:09 2007 UTC revision 67 by nigel, Sat Feb 24 21:40:13 2007 UTC
# Line 2  Line 2 
2  *             PCRE testing program               *  *             PCRE testing program               *
3  *************************************************/  *************************************************/
4    
5    /* This program was hacked up as a tester for PCRE. I really should have
6    written it more tidily in the first place. Will I ever learn? It has grown and
7    been extended and consequently is now rather untidy in places. */
8    
9  #include <ctype.h>  #include <ctype.h>
10  #include <stdio.h>  #include <stdio.h>
11  #include <string.h>  #include <string.h>
12  #include <stdlib.h>  #include <stdlib.h>
13  #include <time.h>  #include <time.h>
14    #include <locale.h>
15    
16  /* Use the internal info for displaying the results of pcre_study(). */  /* We need the internal info for displaying the results of pcre_study(). Also
17    for getting the opcodes for showing compiled code. */
18    
19    #define PCRE_SPY        /* For Win32 build, import data, not export */
20  #include "internal.h"  #include "internal.h"
21    
22    /* It is possible to compile this test program without including support for
23    testing the POSIX interface, though this is not available via the standard
24    Makefile. */
25    
26    #if !defined NOPOSIX
27  #include "pcreposix.h"  #include "pcreposix.h"
28    #endif
29    
30  #ifndef CLOCKS_PER_SEC  #ifndef CLOCKS_PER_SEC
31  #ifdef CLK_TCK  #ifdef CLK_TCK
# Line 21  Line 35 
35  #endif  #endif
36  #endif  #endif
37    
38    #define LOOPREPEAT 50000
39    
40    
41  static FILE *outfile;  static FILE *outfile;
42  static int log_store = 0;  static int log_store = 0;
43    static int callout_count;
44    static int callout_extra;
45    static int callout_fail_count;
46    static int callout_fail_id;
47    static int first_callout;
48    static int use_utf8;
49    static size_t gotten_store;
50    
51    
52    
53    static int utf8_table1[] = {
54      0x0000007f, 0x000007ff, 0x0000ffff, 0x001fffff, 0x03ffffff, 0x7fffffff};
55    
56    static int utf8_table2[] = {
57      0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
58    
59    static int utf8_table3[] = {
60      0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
61    
62    
63    
64    /*************************************************
65    *         Print compiled regex                   *
66    *************************************************/
67    
68    /* The code for doing this is held in a separate file that is also included in
69    pcre.c when it is compiled with the debug switch. It defines a function called
70    print_internals(), which uses a table of opcode lengths defined by the macro
71    OP_LENGTHS, whose name must be OP_lengths. */
72    
73    static uschar OP_lengths[] = { OP_LENGTHS };
74    
75    #include "printint.c"
76    
77    
78    
79    /*************************************************
80    *          Read number from string               *
81    *************************************************/
82    
83    /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
84    around with conditional compilation, just do the job by hand. It is only used
85    for unpicking the -o argument, so just keep it simple.
86    
87    Arguments:
88      str           string to be converted
89      endptr        where to put the end pointer
90    
91    Returns:        the unsigned long
92    */
93    
94    static int
95    get_value(unsigned char *str, unsigned char **endptr)
96    {
97    int result = 0;
98    while(*str != 0 && isspace(*str)) str++;
99    while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
100    *endptr = str;
101    return(result);
102    }
103    
104    
105    
106  /* Debugging function to print the internal form of the regex. This is the same  /*************************************************
107  code as contained in pcre.c under the DEBUG macro. */  *       Convert character value to UTF-8         *
108    *************************************************/
109    
110  static const char *OP_names[] = {  /* This function takes an integer value in the range 0 - 0x7fffffff
111    "End", "\\A", "\\B", "\\b", "\\D", "\\d",  and encodes it as a UTF-8 character in 0 to 6 bytes.
   "\\S", "\\s", "\\W", "\\w", "Cut", "\\Z", "^", "$", "Any", "chars",  
   "not",  
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
   "*", "*?", "+", "+?", "?", "??", "{", "{",  
   "class", "Ref",  
   "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", "Once",  
   "Brazero", "Braminzero", "Bra"  
 };  
112    
113    Arguments:
114      cvalue     the character value
115      buffer     pointer to buffer for result - at least 6 bytes long
116    
117    Returns:     number of characters placed in the buffer
118                 -1 if input character is negative
119                 0 if input character is positive but too big (only when
120                 int is longer than 32 bits)
121    */
122    
123  static void print_internals(pcre *re)  static int
124    ord2utf8(int cvalue, unsigned char *buffer)
125  {  {
126  unsigned char *code = ((real_pcre *)re)->code;  register int i, j;
127    for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++)
128      if (cvalue <= utf8_table1[i]) break;
129    if (i >= sizeof(utf8_table1)/sizeof(int)) return 0;
130    if (cvalue < 0) return -1;
131    
132    buffer += i;
133    for (j = i; j > 0; j--)
134     {
135     *buffer-- = 0x80 | (cvalue & 0x3f);
136     cvalue >>= 6;
137     }
138    *buffer = utf8_table2[i] | cvalue;
139    return i + 1;
140    }
141    
 printf("------------------------------------------------------------------\n");  
142    
143  for(;;)  /*************************************************
144    *            Convert UTF-8 string to value       *
145    *************************************************/
146    
147    /* This function takes one or more bytes that represents a UTF-8 character,
148    and returns the value of the character.
149    
150    Argument:
151      buffer   a pointer to the byte vector
152      vptr     a pointer to an int to receive the value
153    
154    Returns:   >  0 => the number of bytes consumed
155               -6 to 0 => malformed UTF-8 character at offset = (-return)
156    */
157    
158    static int
159    utf82ord(unsigned char *buffer, int *vptr)
160    {
161    int c = *buffer++;
162    int d = c;
163    int i, j, s;
164    
165    for (i = -1; i < 6; i++)               /* i is number of additional bytes */
166    {    {
167    int c;    if ((d & 0x80) == 0) break;
168    int charlength;    d <<= 1;
169      }
170    
171    printf("%3d ", code - ((real_pcre *)re)->code);  if (i == -1) { *vptr = c; return 1; }  /* ascii character */
172    if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
   if (*code >= OP_BRA)  
     {  
     printf("%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);  
     code += 2;  
     }  
   
   else switch(*code)  
     {  
     case OP_END:  
     printf("    %s\n", OP_names[*code]);  
     printf("------------------------------------------------------------------\n");  
     return;  
   
     case OP_CHARS:  
     charlength = *(++code);  
     printf("%3d ", charlength);  
     while (charlength-- > 0)  
       if (isprint(c = *(++code))) printf("%c", c); else printf("\\x%02x", c);  
     break;  
   
     case OP_KETRMAX:  
     case OP_KETRMIN:  
     case OP_ALT:  
     case OP_KET:  
     case OP_ASSERT:  
     case OP_ASSERT_NOT:  
     case OP_ONCE:  
     printf("%3d %s", (code[1] << 8) + code[2], OP_names[*code]);  
     code += 2;  
     break;  
   
     case OP_STAR:  
     case OP_MINSTAR:  
     case OP_PLUS:  
     case OP_MINPLUS:  
     case OP_QUERY:  
     case OP_MINQUERY:  
     case OP_TYPESTAR:  
     case OP_TYPEMINSTAR:  
     case OP_TYPEPLUS:  
     case OP_TYPEMINPLUS:  
     case OP_TYPEQUERY:  
     case OP_TYPEMINQUERY:  
     if (*code >= OP_TYPESTAR)  
       printf("    %s", OP_names[code[1]]);  
     else if (isprint(c = code[1])) printf("    %c", c);  
       else printf("    \\x%02x", c);  
     printf("%s", OP_names[*code++]);  
     break;  
   
     case OP_EXACT:  
     case OP_UPTO:  
     case OP_MINUPTO:  
     if (isprint(c = code[3])) printf("    %c{", c);  
       else printf("    \\x%02x{", c);  
     if (*code != OP_EXACT) printf(",");  
     printf("%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_MINUPTO) printf("?");  
     code += 3;  
     break;  
   
     case OP_TYPEEXACT:  
     case OP_TYPEUPTO:  
     case OP_TYPEMINUPTO:  
     printf("    %s{", OP_names[code[3]]);  
     if (*code != OP_TYPEEXACT) printf(",");  
     printf("%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_TYPEMINUPTO) printf("?");  
     code += 3;  
     break;  
   
     case OP_NOT:  
     if (isprint(c = *(++code))) printf("    [^%c]", c);  
       else printf("    [^\\x%02x]", c);  
     break;  
   
     case OP_NOTSTAR:  
     case OP_NOTMINSTAR:  
     case OP_NOTPLUS:  
     case OP_NOTMINPLUS:  
     case OP_NOTQUERY:  
     case OP_NOTMINQUERY:  
     if (isprint(c = code[1])) printf("    [^%c]", c);  
       else printf("    [^\\x%02x]", c);  
     printf("%s", OP_names[*code++]);  
     break;  
   
     case OP_NOTEXACT:  
     case OP_NOTUPTO:  
     case OP_NOTMINUPTO:  
     if (isprint(c = code[3])) printf("    [^%c]{", c);  
       else printf("    [^\\x%02x]{", c);  
     if (*code != OP_NOTEXACT) printf(",");  
     printf("%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_NOTMINUPTO) printf("?");  
     code += 3;  
     break;  
   
     case OP_REF:  
     printf("    \\%d", *(++code));  
     break;  
   
     case OP_CLASS:  
       {  
       int i, min, max;  
   
       code++;  
       printf("    [");  
   
       for (i = 0; i < 256; i++)  
         {  
         if ((code[i/8] & (1 << (i&7))) != 0)  
           {  
           int j;  
           for (j = i+1; j < 256; j++)  
             if ((code[j/8] & (1 << (j&7))) == 0) break;  
           if (i == '-' || i == ']') printf("\\");  
           if (isprint(i)) printf("%c", i); else printf("\\x%02x", i);  
           if (--j > i)  
             {  
             printf("-");  
             if (j == '-' || j == ']') printf("\\");  
             if (isprint(j)) printf("%c", j); else printf("\\x%02x", j);  
             }  
           i = j;  
           }  
         }  
       printf("]");  
       code += 32;  
   
       switch(*code)  
         {  
         case OP_CRSTAR:  
         case OP_CRMINSTAR:  
         case OP_CRPLUS:  
         case OP_CRMINPLUS:  
         case OP_CRQUERY:  
         case OP_CRMINQUERY:  
         printf("%s", OP_names[*code]);  
         break;  
173    
174          case OP_CRRANGE:  /* i now has a value in the range 1-5 */
175          case OP_CRMINRANGE:  
176          min = (code[1] << 8) + code[2];  s = 6*i;
177          max = (code[3] << 8) + code[4];  d = (c & utf8_table3[i]) << s;
         if (max == 0) printf("{%d,}", min);  
         else printf("{%d,%d}", min, max);  
         if (*code == OP_CRMINRANGE) printf("?");  
         code += 4;  
         break;  
178    
179          default:  for (j = 0; j < i; j++)
180          code--;    {
181      c = *buffer++;
182      if ((c & 0xc0) != 0x80) return -(j+1);
183      s -= 6;
184      d |= (c & 0x3f) << s;
185      }
186    
187    /* Check that encoding was the correct unique one */
188    
189    for (j = 0; j < sizeof(utf8_table1)/sizeof(int); j++)
190      if (d <= utf8_table1[j]) break;
191    if (j != i) return -(i+1);
192    
193    /* Valid value */
194    
195    *vptr = d;
196    return i+1;
197    }
198    
199    
200    
201    /*************************************************
202    *             Print character string             *
203    *************************************************/
204    
205    /* Character string printing function. Must handle UTF-8 strings in utf8
206    mode. Yields number of characters printed. If handed a NULL file, just counts
207    chars without printing. */
208    
209    static int pchars(unsigned char *p, int length, FILE *f)
210    {
211    int c;
212    int yield = 0;
213    
214    while (length-- > 0)
215      {
216      if (use_utf8)
217        {
218        int rc = utf82ord(p, &c);
219    
220        if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
221          {
222          length -= rc - 1;
223          p += rc;
224          if (c < 256 && isprint(c))
225            {
226            if (f != NULL) fprintf(f, "%c", c);
227            yield++;
228          }          }
229          else
230            {
231            int n;
232            if (f != NULL) fprintf(f, "\\x{%02x}%n", c, &n);
233            yield += n;
234            }
235          continue;
236        }        }
237      break;      }
238    
239      /* Anything else is just a one-node item */     /* Not UTF-8, or malformed UTF-8  */
240    
241      default:    if (isprint(c = *(p++)))
242      printf("    %s", OP_names[*code]);      {
243      break;      if (f != NULL) fprintf(f, "%c", c);
244        yield++;
245        }
246      else
247        {
248        if (f != NULL) fprintf(f, "\\x%02x", c);
249        yield += 4;
250      }      }
   
   code++;  
   printf("\n");  
251    }    }
252    
253    return yield;
254  }  }
255    
256    
257    
258  /* Character string printing function. */  /*************************************************
259    *              Callout function                  *
260    *************************************************/
261    
262    /* Called from PCRE as a result of the (?C) item. We print out where we are in
263    the match. Yield zero unless more callouts than the fail count, or the callout
264    data is not zero. */
265    
266  static void pchars(unsigned char *p, int length)  static int callout(pcre_callout_block *cb)
267  {  {
268  int c;  FILE *f = (first_callout | callout_extra)? outfile : NULL;
269  while (length-- > 0)  int i, pre_start, post_start;
270    if (isprint(c = *(p++))) fprintf(outfile, "%c", c);  
271      else fprintf(outfile, "\\x%02x", c);  if (callout_extra)
272      {
273      fprintf(f, "Callout %d: last capture = %d\n",
274        cb->callout_number, cb->capture_last);
275    
276      for (i = 0; i < cb->capture_top * 2; i += 2)
277        {
278        if (cb->offset_vector[i] < 0)
279          fprintf(f, "%2d: <unset>\n", i/2);
280        else
281          {
282          fprintf(f, "%2d: ", i/2);
283          (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
284            cb->offset_vector[i+1] - cb->offset_vector[i], f);
285          fprintf(f, "\n");
286          }
287        }
288      }
289    
290    /* Re-print the subject in canonical form, the first time or if giving full
291    datails. On subsequent calls in the same match, we use pchars just to find the
292    printed lengths of the substrings. */
293    
294    if (f != NULL) fprintf(f, "--->");
295    
296    pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
297    post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
298      cb->current_position - cb->start_match, f);
299    
300    (void)pchars((unsigned char *)(cb->subject + cb->current_position),
301      cb->subject_length - cb->current_position, f);
302    
303    if (f != NULL) fprintf(f, "\n");
304    
305    /* Always print appropriate indicators, with callout number if not already
306    shown */
307    
308    if (callout_extra) fprintf(outfile, "    ");
309      else fprintf(outfile, "%3d ", cb->callout_number);
310    
311    for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
312    fprintf(outfile, "^");
313    
314    if (post_start > 0)
315      {
316      for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
317      fprintf(outfile, "^");
318      }
319    
320    fprintf(outfile, "\n");
321    
322    first_callout = 0;
323    
324    if ((int)(cb->callout_data) != 0)
325      {
326      fprintf(outfile, "Callout data = %d\n", (int)(cb->callout_data));
327      return (int)(cb->callout_data);
328      }
329    
330    return (cb->callout_number != callout_fail_id)? 0 :
331           (++callout_count >= callout_fail_count)? 1 : 0;
332  }  }
333    
334    
335    /*************************************************
336    *            Local malloc function               *
337    *************************************************/
338    
339  /* Alternative malloc function, to test functionality and show the size of the  /* Alternative malloc function, to test functionality and show the size of the
340  compiled re. */  compiled re. */
341    
342  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
343  {  {
344  if (log_store) fprintf(outfile, "Store size request: %d\n", (int)size);  gotten_store = size;
345  return malloc(size);  return malloc(size);
346  }  }
347    
348    
349    
350    /*************************************************
351    *          Call pcre_fullinfo()                  *
352    *************************************************/
353    
354    /* Get one piece of information from the pcre_fullinfo() function */
355    
356    static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
357    {
358    int rc;
359    if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
360      fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
361    }
362    
363    
364    
365    /*************************************************
366    *                Main Program                    *
367    *************************************************/
368    
369  /* Read lines from named file or stdin and write to named file or stdout; lines  /* Read lines from named file or stdin and write to named file or stdout; lines
370  consist of a regular expression, in delimiters and optionally followed by  consist of a regular expression, in delimiters and optionally followed by
371  options, followed by a set of test data, terminated by an empty line. */  options, followed by a set of test data, terminated by an empty line. */
# Line 263  int study_options = 0; Line 378  int study_options = 0;
378  int op = 1;  int op = 1;
379  int timeit = 0;  int timeit = 0;
380  int showinfo = 0;  int showinfo = 0;
381    int showstore = 0;
382    int size_offsets = 45;
383    int size_offsets_max;
384    int *offsets;
385    #if !defined NOPOSIX
386  int posix = 0;  int posix = 0;
387    #endif
388  int debug = 0;  int debug = 0;
389    int done = 0;
390  unsigned char buffer[30000];  unsigned char buffer[30000];
391  unsigned char dbuffer[1024];  unsigned char dbuffer[1024];
392    
# Line 276  outfile = stdout; Line 398  outfile = stdout;
398    
399  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
400    {    {
401    if (strcmp(argv[op], "-s") == 0) log_store = 1;    unsigned char *endptr;
402    
403      if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
404        showstore = 1;
405    else if (strcmp(argv[op], "-t") == 0) timeit = 1;    else if (strcmp(argv[op], "-t") == 0) timeit = 1;
406    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
407    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
408      else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
409          ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
410            *endptr == 0))
411        {
412        op++;
413        argc--;
414        }
415    #if !defined NOPOSIX
416    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(argv[op], "-p") == 0) posix = 1;
417    #endif
418      else if (strcmp(argv[op], "-C") == 0)
419        {
420        int rc;
421        printf("PCRE version %s\n", pcre_version());
422        printf("Compiled with\n");
423        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
424        printf("  %sUTF-8 support\n", rc? "" : "No ");
425        (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
426        printf("  Newline character is %s\n", (rc == '\r')? "CR" : "LF");
427        (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
428        printf("  Internal link size = %d\n", rc);
429        (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
430        printf("  POSIX malloc threshold = %d\n", rc);
431        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
432        printf("  Default match limit = %d\n", rc);
433        exit(0);
434        }
435    else    else
436      {      {
437      printf("*** Unknown option %s\n", argv[op]);      printf("** Unknown or malformed option %s\n", argv[op]);
438        printf("Usage:   pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");
439        printf("  -C     show PCRE compile-time options and exit\n");
440        printf("  -d     debug: show compiled code; implies -i\n"
441               "  -i     show information about compiled pattern\n"
442               "  -o <n> set size of offsets vector to <n>\n");
443    #if !defined NOPOSIX
444        printf("  -p     use POSIX interface\n");
445    #endif
446        printf("  -s     output store information\n"
447               "  -t     time compilation and execution\n");
448      return 1;      return 1;
449      }      }
450    op++;    op++;
451    argc--;    argc--;
452    }    }
453    
454    /* Get the store for the offsets vector, and remember what it was */
455    
456    size_offsets_max = size_offsets;
457    offsets = malloc(size_offsets_max * sizeof(int));
458    if (offsets == NULL)
459      {
460      printf("** Failed to get %d bytes of memory for offsets vector\n",
461        size_offsets_max * sizeof(int));
462      return 1;
463      }
464    
465  /* Sort out the input and output files */  /* Sort out the input and output files */
466    
467  if (argc > 1)  if (argc > 1)
# Line 316  if (argc > 2) Line 488  if (argc > 2)
488    
489  pcre_malloc = new_malloc;  pcre_malloc = new_malloc;
490    
491  /* Heading line, then prompt for first re if stdin */  /* Heading line, then prompt for first regex if stdin */
492    
 fprintf(outfile, "Testing Perl-Compatible Regular Expressions\n");  
493  fprintf(outfile, "PCRE version %s\n\n", pcre_version());  fprintf(outfile, "PCRE version %s\n\n", pcre_version());
494    
495  /* Main loop */  /* Main loop */
496    
497  for (;;)  while (!done)
498    {    {
499    pcre *re = NULL;    pcre *re = NULL;
500    pcre_extra *extra = NULL;    pcre_extra *extra = NULL;
501    
502    #if !defined NOPOSIX  /* There are still compilers that require no indent */
503    regex_t preg;    regex_t preg;
504      int do_posix = 0;
505    #endif
506    
507    const char *error;    const char *error;
508    unsigned char *p, *pp;    unsigned char *p, *pp, *ppp;
509      const unsigned char *tables = NULL;
510    int do_study = 0;    int do_study = 0;
511    int do_debug = 0;    int do_debug = debug;
512    int do_posix = 0;    int do_G = 0;
513      int do_g = 0;
514      int do_showinfo = showinfo;
515      int do_showrest = 0;
516    int erroroffset, len, delimiter;    int erroroffset, len, delimiter;
517    
518      use_utf8 = 0;
519    
520    if (infile == stdin) printf("  re> ");    if (infile == stdin) printf("  re> ");
521    if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;    if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;
522    if (infile != stdin) fprintf(outfile, (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
523      fflush(outfile);
524    
525    p = buffer;    p = buffer;
526    while (isspace(*p)) p++;    while (isspace(*p)) p++;
# Line 348  for (;;) Line 531  for (;;)
531    
532    delimiter = *p++;    delimiter = *p++;
533    
534    if (isalnum(delimiter))    if (isalnum(delimiter) || delimiter == '\\')
535      {      {
536      fprintf(outfile, "** Delimiter must not be alphameric\n");      fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
537      goto SKIP_DATA;      goto SKIP_DATA;
538      }      }
539    
# Line 358  for (;;) Line 541  for (;;)
541    
542    for(;;)    for(;;)
543      {      {
544      while (*pp != 0 && *pp != delimiter) pp++;      while (*pp != 0)
545          {
546          if (*pp == '\\' && pp[1] != 0) pp++;
547            else if (*pp == delimiter) break;
548          pp++;
549          }
550      if (*pp != 0) break;      if (*pp != 0) break;
551    
552      len = sizeof(buffer) - (pp - buffer);      len = sizeof(buffer) - (pp - buffer);
# Line 372  for (;;) Line 560  for (;;)
560      if (fgets((char *)pp, len, infile) == NULL)      if (fgets((char *)pp, len, infile) == NULL)
561        {        {
562        fprintf(outfile, "** Unexpected EOF\n");        fprintf(outfile, "** Unexpected EOF\n");
563        goto END_OFF;        done = 1;
564          goto CONTINUE;
565        }        }
566      if (infile != stdin) fprintf(outfile, (char *)pp);      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
567      }      }
568    
569      /* If the first character after the delimiter is backslash, make
570      the pattern end with backslash. This is purely to provide a way
571      of testing for the error message when a pattern ends with backslash. */
572    
573      if (pp[1] == '\\') *pp++ = '\\';
574    
575    /* Terminate the pattern at the delimiter */    /* Terminate the pattern at the delimiter */
576    
577    *pp++ = 0;    *pp++ = 0;
# Line 385  for (;;) Line 580  for (;;)
580    
581    options = 0;    options = 0;
582    study_options = 0;    study_options = 0;
583      log_store = showstore;  /* default from command line */
584    
585    while (*pp != 0)    while (*pp != 0)
586      {      {
587      switch (*pp++)      switch (*pp++)
588        {        {
589          case 'g': do_g = 1; break;
590        case 'i': options |= PCRE_CASELESS; break;        case 'i': options |= PCRE_CASELESS; break;
591        case 'm': options |= PCRE_MULTILINE; break;        case 'm': options |= PCRE_MULTILINE; break;
592        case 's': options |= PCRE_DOTALL; break;        case 's': options |= PCRE_DOTALL; break;
593        case 'x': options |= PCRE_EXTENDED; break;        case 'x': options |= PCRE_EXTENDED; break;
594    
595          case '+': do_showrest = 1; break;
596        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
597        case 'D': do_debug = 1; break;        case 'D': do_debug = do_showinfo = 1; break;
598        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
599          case 'G': do_G = 1; break;
600          case 'I': do_showinfo = 1; break;
601          case 'M': log_store = 1; break;
602          case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
603    
604    #if !defined NOPOSIX
605        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
606    #endif
607    
608        case 'S': do_study = 1; break;        case 'S': do_study = 1; break;
609        case 'I': study_options |= PCRE_CASELESS; break;        case 'U': options |= PCRE_UNGREEDY; break;
610        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
611          case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
612    
613          case 'L':
614          ppp = pp;
615          while (*ppp != '\n' && *ppp != ' ') ppp++;
616          *ppp = 0;
617          if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
618            {
619            fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
620            goto SKIP_DATA;
621            }
622          tables = pcre_maketables();
623          pp = ppp;
624          break;
625    
626        case '\n': case ' ': break;        case '\n': case ' ': break;
627        default:        default:
628        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
# Line 407  for (;;) Line 630  for (;;)
630        }        }
631      }      }
632    
633    /* Handle compiing via the POSIX interface, which doesn't support the    /* Handle compiling via the POSIX interface, which doesn't support the
634    timing, showing, or debugging options. */    timing, showing, or debugging options, nor the ability to pass over
635      local character tables. */
636    
637    #if !defined NOPOSIX
638    if (posix || do_posix)    if (posix || do_posix)
639      {      {
640      int rc;      int rc;
# Line 432  for (;;) Line 657  for (;;)
657    /* Handle compiling via the native interface */    /* Handle compiling via the native interface */
658    
659    else    else
660    #endif  /* !defined NOPOSIX */
661    
662      {      {
663      if (timeit)      if (timeit)
664        {        {
665        register int i;        register int i;
666        clock_t time_taken;        clock_t time_taken;
667        clock_t start_time = clock();        clock_t start_time = clock();
668        for (i = 0; i < 4000; i++)        for (i = 0; i < LOOPREPEAT; i++)
669          {          {
670          re = pcre_compile((char *)p, options, &error, &erroroffset);          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
671          if (re != NULL) free(re);          if (re != NULL) free(re);
672          }          }
673        time_taken = clock() - start_time;        time_taken = clock() - start_time;
674        fprintf(outfile, "Compile time %.2f milliseconds\n",        fprintf(outfile, "Compile time %.3f milliseconds\n",
675          ((double)time_taken)/(4 * CLOCKS_PER_SEC));          (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
676              (double)CLOCKS_PER_SEC);
677        }        }
678    
679      re = pcre_compile((char *)p, options, &error, &erroroffset);      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
680    
681      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
682      if non-interactive. */      if non-interactive. */
# Line 462  for (;;) Line 690  for (;;)
690          for (;;)          for (;;)
691            {            {
692            if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)            if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)
693              goto END_OFF;              {
694                done = 1;
695                goto CONTINUE;
696                }
697            len = (int)strlen((char *)buffer);            len = (int)strlen((char *)buffer);
698            while (len > 0 && isspace(buffer[len-1])) len--;            while (len > 0 && isspace(buffer[len-1])) len--;
699            if (len == 0) break;            if (len == 0) break;
700            }            }
701          fprintf(outfile, "\n");          fprintf(outfile, "\n");
702          }          }
703        continue;        goto CONTINUE;
704        }        }
705    
706      /* Compilation succeeded; print data if required */      /* Compilation succeeded; print data if required. There are now two
707        info-returning functions. The old one has a limited interface and
708        returns only limited data. Check that it agrees with the newer one. */
709    
710        if (log_store)
711          fprintf(outfile, "Memory allocation (code space): %d\n",
712            (int)(gotten_store -
713                  sizeof(real_pcre) -
714                  ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
715    
716      if (showinfo || do_debug)      if (do_showinfo)
717        {        {
718        int first_char, count;        unsigned long int get_options;
719          int old_first_char, old_options, old_count;
720          int count, backrefmax, first_char, need_char;
721          int nameentrysize, namecount;
722          const uschar *nametable;
723          size_t size;
724    
725        if (debug || do_debug) print_internals(re);        if (do_debug)
726            {
727            fprintf(outfile, "------------------------------------------------------------------\n");
728            print_internals(re, outfile);
729            }
730    
731          new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
732          new_info(re, NULL, PCRE_INFO_SIZE, &size);
733          new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
734          new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
735          new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
736          new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
737          new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
738          new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
739          new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
740    
741        count = pcre_info(re, &options, &first_char);        old_count = pcre_info(re, &old_options, &old_first_char);
742        if (count < 0) fprintf(outfile,        if (count < 0) fprintf(outfile,
743          "Error %d while reading info\n", count);          "Error %d from pcre_info()\n", count);
744        else        else
745          {          {
746          fprintf(outfile, "Identifying subpattern count = %d\n", count);          if (old_count != count) fprintf(outfile,
747          if (options == 0) fprintf(outfile, "No options\n");            "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
748            else fprintf(outfile, "Options:%s%s%s%s%s%s%s\n",              old_count);
749              ((options & PCRE_ANCHORED) != 0)? " anchored" : "",  
750              ((options & PCRE_CASELESS) != 0)? " caseless" : "",          if (old_first_char != first_char) fprintf(outfile,
751              ((options & PCRE_EXTENDED) != 0)? " extended" : "",            "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
752              ((options & PCRE_MULTILINE) != 0)? " multiline" : "",              first_char, old_first_char);
753              ((options & PCRE_DOTALL) != 0)? " dotall" : "",  
754              ((options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",          if (old_options != (int)get_options) fprintf(outfile,
755              ((options & PCRE_EXTRA) != 0)? " extra" : "");            "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
756          if (first_char == -1)              get_options, old_options);
757            {          }
758            fprintf(outfile, "First char at start or follows \\n\n");  
759            }        if (size != gotten_store) fprintf(outfile,
760          else if (first_char < 0)          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
761            size, gotten_store);
762    
763          fprintf(outfile, "Capturing subpattern count = %d\n", count);
764          if (backrefmax > 0)
765            fprintf(outfile, "Max back reference = %d\n", backrefmax);
766    
767          if (namecount > 0)
768            {
769            fprintf(outfile, "Named capturing subpatterns:\n");
770            while (namecount-- > 0)
771            {            {
772            fprintf(outfile, "No first char\n");            fprintf(outfile, "  %s %*s%3d\n", nametable + 2,
773                nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
774                GET2(nametable, 0));
775              nametable += nameentrysize;
776            }            }
777            }
778    
779          if (get_options == 0) fprintf(outfile, "No options\n");
780            else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s\n",
781              ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
782              ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
783              ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
784              ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
785              ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
786              ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
787              ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
788              ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
789              ((get_options & PCRE_UTF8) != 0)? " utf8" : "");
790    
791          if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)
792            fprintf(outfile, "Case state changes\n");
793    
794          if (first_char == -1)
795            {
796            fprintf(outfile, "First char at start or follows \\n\n");
797            }
798          else if (first_char < 0)
799            {
800            fprintf(outfile, "No first char\n");
801            }
802          else
803            {
804            int ch = first_char & 255;
805            const char *caseless = ((first_char & REQ_CASELESS) == 0)?
806              "" : " (caseless)";
807            if (isprint(ch))
808              fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
809          else          else
810            {            fprintf(outfile, "First char = %d%s\n", ch, caseless);
811            if (isprint(first_char))          }
812              fprintf(outfile, "First char = \'%c\'\n", first_char);  
813            else        if (need_char < 0)
814              fprintf(outfile, "First char = %d\n", first_char);          {
815            }          fprintf(outfile, "No need char\n");
816            }
817          else
818            {
819            int ch = need_char & 255;
820            const char *caseless = ((need_char & REQ_CASELESS) == 0)?
821              "" : " (caseless)";
822            if (isprint(ch))
823              fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
824            else
825              fprintf(outfile, "Need char = %d%s\n", ch, caseless);
826          }          }
827        }        }
828    
# Line 523  for (;;) Line 836  for (;;)
836          register int i;          register int i;
837          clock_t time_taken;          clock_t time_taken;
838          clock_t start_time = clock();          clock_t start_time = clock();
839          for (i = 0; i < 4000; i++)          for (i = 0; i < LOOPREPEAT; i++)
840            extra = pcre_study(re, study_options, &error);            extra = pcre_study(re, study_options, &error);
841          time_taken = clock() - start_time;          time_taken = clock() - start_time;
842          if (extra != NULL) free(extra);          if (extra != NULL) free(extra);
843          fprintf(outfile, "  Study time %.2f milliseconds\n",          fprintf(outfile, "  Study time %.3f milliseconds\n",
844            ((double)time_taken)/(4 * CLOCKS_PER_SEC));            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
845                (double)CLOCKS_PER_SEC);
846          }          }
847    
848        extra = pcre_study(re, study_options, &error);        extra = pcre_study(re, study_options, &error);
# Line 537  for (;;) Line 851  for (;;)
851        else if (extra == NULL)        else if (extra == NULL)
852          fprintf(outfile, "Study returned NULL\n");          fprintf(outfile, "Study returned NULL\n");
853    
854        /* This looks at internal information. A bit kludgy to do it this        else if (do_showinfo)
       way, but it is useful for testing. */  
   
       else if (showinfo || do_debug)  
855          {          {
856          real_pcre_extra *xx = (real_pcre_extra *)extra;          size_t size;
857          if ((xx->options & PCRE_STUDY_MAPPED) == 0)          uschar *start_bits = NULL;
858            new_info(re, extra, PCRE_INFO_STUDYSIZE, &size);
859            new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
860            fprintf(outfile, "Study size = %d\n", size);
861            if (start_bits == NULL)
862            fprintf(outfile, "No starting character set\n");            fprintf(outfile, "No starting character set\n");
863          else          else
864            {            {
# Line 552  for (;;) Line 867  for (;;)
867            fprintf(outfile, "Starting character set: ");            fprintf(outfile, "Starting character set: ");
868            for (i = 0; i < 256; i++)            for (i = 0; i < 256; i++)
869              {              {
870              if ((xx->start_bits[i/8] & (1<<(i%8))) != 0)              if ((start_bits[i/8] & (1<<(i%8))) != 0)
871                {                {
872                if (c > 75)                if (c > 75)
873                  {                  {
# Line 581  for (;;) Line 896  for (;;)
896    
897    for (;;)    for (;;)
898      {      {
899      unsigned char *pp;      unsigned char *q;
900        unsigned char *bptr = dbuffer;
901        int *use_offsets = offsets;
902        int use_size_offsets = size_offsets;
903        int callout_data = 0;
904        int callout_data_set = 0;
905      int count, c;      int count, c;
906      int offsets[30];      int copystrings = 0;
907      int size_offsets = sizeof(offsets)/sizeof(int);      int find_match_limit = 0;
908        int getstrings = 0;
909        int getlist = 0;
910        int gmatched = 0;
911        int start_offset = 0;
912        int g_notempty = 0;
913    
914      options = 0;      options = 0;
915    
916      if (infile == stdin) printf("  data> ");      pcre_callout = callout;
917      if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) goto END_OFF;      first_callout = 1;
918      if (infile != stdin) fprintf(outfile, (char *)buffer);      callout_extra = 0;
919        callout_count = 0;
920        callout_fail_count = 999999;
921        callout_fail_id = -1;
922    
923        if (infile == stdin) printf("data> ");
924        if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)
925          {
926          done = 1;
927          goto CONTINUE;
928          }
929        if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
930    
931      len = (int)strlen((char *)buffer);      len = (int)strlen((char *)buffer);
932      while (len > 0 && isspace(buffer[len-1])) len--;      while (len > 0 && isspace(buffer[len-1])) len--;
# Line 600  for (;;) Line 936  for (;;)
936      p = buffer;      p = buffer;
937      while (isspace(*p)) p++;      while (isspace(*p)) p++;
938    
939      pp = dbuffer;      q = dbuffer;
940      while ((c = *p++) != 0)      while ((c = *p++) != 0)
941        {        {
942        int i = 0;        int i = 0;
943        int n = 0;        int n = 0;
944    
945        if (c == '\\') switch ((c = *p++))        if (c == '\\') switch ((c = *p++))
946          {          {
947          case 'a': c =    7; break;          case 'a': c =    7; break;
# Line 624  for (;;) Line 961  for (;;)
961          break;          break;
962    
963          case 'x':          case 'x':
964    
965            /* Handle \x{..} specially - new Perl thing for utf8 */
966    
967            if (*p == '{')
968              {
969              unsigned char *pt = p;
970              c = 0;
971              while (isxdigit(*(++pt)))
972                c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
973              if (*pt == '}')
974                {
975                unsigned char buff8[8];
976                int ii, utn;
977                utn = ord2utf8(c, buff8);
978                for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
979                c = buff8[ii];   /* Last byte */
980                p = pt + 1;
981                break;
982                }
983              /* Not correct form; fall through */
984              }
985    
986            /* Ordinary \x */
987    
988          c = 0;          c = 0;
989          while (i++ < 2 && isxdigit(*p))          while (i++ < 2 && isxdigit(*p))
990            {            {
# Line 644  for (;;) Line 1005  for (;;)
1005          options |= PCRE_NOTBOL;          options |= PCRE_NOTBOL;
1006          continue;          continue;
1007    
1008          case 'E':          case 'C':
1009          options |= PCRE_DOLLAR_ENDONLY;          if (isdigit(*p))    /* Set copy string */
1010              {
1011              while(isdigit(*p)) n = n * 10 + *p++ - '0';
1012              copystrings |= 1 << n;
1013              }
1014            else if (isalnum(*p))
1015              {
1016              uschar name[256];
1017              uschar *npp = name;
1018              while (isalnum(*p)) *npp++ = *p++;
1019              *npp = 0;
1020              n = pcre_get_stringnumber(re, (char *)name);
1021              if (n < 0)
1022                fprintf(outfile, "no parentheses with name \"%s\"\n", name);
1023              else copystrings |= 1 << n;
1024              }
1025            else if (*p == '+')
1026              {
1027              callout_extra = 1;
1028              p++;
1029              }
1030            else if (*p == '-')
1031              {
1032              pcre_callout = NULL;
1033              p++;
1034              }
1035            else if (*p == '!')
1036              {
1037              callout_fail_id = 0;
1038              p++;
1039              while(isdigit(*p))
1040                callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1041              callout_fail_count = 0;
1042              if (*p == '!')
1043                {
1044                p++;
1045                while(isdigit(*p))
1046                  callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1047                }
1048              }
1049            else if (*p == '*')
1050              {
1051              int sign = 1;
1052              callout_data = 0;
1053              if (*(++p) == '-') { sign = -1; p++; }
1054              while(isdigit(*p))
1055                callout_data = callout_data * 10 + *p++ - '0';
1056              callout_data *= sign;
1057              callout_data_set = 1;
1058              }
1059          continue;          continue;
1060    
1061          case 'I':          case 'G':
1062          options |= PCRE_CASELESS;          if (isdigit(*p))
1063              {
1064              while(isdigit(*p)) n = n * 10 + *p++ - '0';
1065              getstrings |= 1 << n;
1066              }
1067            else if (isalnum(*p))
1068              {
1069              uschar name[256];
1070              uschar *npp = name;
1071              while (isalnum(*p)) *npp++ = *p++;
1072              *npp = 0;
1073              n = pcre_get_stringnumber(re, (char *)name);
1074              if (n < 0)
1075                fprintf(outfile, "no parentheses with name \"%s\"\n", name);
1076              else getstrings |= 1 << n;
1077              }
1078            continue;
1079    
1080            case 'L':
1081            getlist = 1;
1082          continue;          continue;
1083    
1084          case 'M':          case 'M':
1085          options |= PCRE_MULTILINE;          find_match_limit = 1;
1086          continue;          continue;
1087    
1088          case 'S':          case 'N':
1089          options |= PCRE_DOTALL;          options |= PCRE_NOTEMPTY;
1090          continue;          continue;
1091    
1092          case 'O':          case 'O':
1093          while(isdigit(*p)) n = n * 10 + *p++ - '0';          while(isdigit(*p)) n = n * 10 + *p++ - '0';
1094          if (n <= (int)sizeof(offsets)/sizeof(int)) size_offsets = n;          if (n > size_offsets_max)
1095              {
1096              size_offsets_max = n;
1097              free(offsets);
1098              use_offsets = offsets = malloc(size_offsets_max * sizeof(int));
1099              if (offsets == NULL)
1100                {
1101                printf("** Failed to get %d bytes of memory for offsets vector\n",
1102                  size_offsets_max * sizeof(int));
1103                return 1;
1104                }
1105              }
1106            use_size_offsets = n;
1107            if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */
1108          continue;          continue;
1109    
1110          case 'Z':          case 'Z':
1111          options |= PCRE_NOTEOL;          options |= PCRE_NOTEOL;
1112          continue;          continue;
1113          }          }
1114        *pp++ = c;        *q++ = c;
1115        }        }
1116      *pp = 0;      *q = 0;
1117      len = pp - dbuffer;      len = q - dbuffer;
1118    
1119      /* Handle matching via the POSIX interface, which does not      /* Handle matching via the POSIX interface, which does not
1120      support timing. */      support timing or playing with the match limit or callout data. */
1121    
1122    #if !defined NOPOSIX
1123      if (posix || do_posix)      if (posix || do_posix)
1124        {        {
1125        int rc;        int rc;
1126        int eflags = 0;        int eflags = 0;
1127        regmatch_t pmatch[30];        regmatch_t *pmatch = NULL;
1128          if (use_size_offsets > 0)
1129            pmatch = malloc(sizeof(regmatch_t) * use_size_offsets);
1130        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1131        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1132    
1133        rc = regexec(&preg, (char *)dbuffer, sizeof(pmatch)/sizeof(regmatch_t),        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
         pmatch, eflags);  
1134    
1135        if (rc != 0)        if (rc != 0)
1136          {          {
# Line 696  for (;;) Line 1140  for (;;)
1140        else        else
1141          {          {
1142          size_t i;          size_t i;
1143          for (i = 0; i < sizeof(pmatch)/sizeof(regmatch_t); i++)          for (i = 0; i < (size_t)use_size_offsets; i++)
1144            {            {
1145            if (pmatch[i].rm_so >= 0)            if (pmatch[i].rm_so >= 0)
1146              {              {
1147              fprintf(outfile, "%2d: ", i);              fprintf(outfile, "%2d: ", (int)i);
1148              pchars(dbuffer + pmatch[i].rm_so,              (void)pchars(dbuffer + pmatch[i].rm_so,
1149                pmatch[i].rm_eo - pmatch[i].rm_so);                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
1150              fprintf(outfile, "\n");              fprintf(outfile, "\n");
1151                if (i == 0 && do_showrest)
1152                  {
1153                  fprintf(outfile, " 0+ ");
1154                  (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
1155                    outfile);
1156                  fprintf(outfile, "\n");
1157                  }
1158              }              }
1159            }            }
1160          }          }
1161          free(pmatch);
1162        }        }
1163    
1164      /* Handle matching via the native interface */      /* Handle matching via the native interface - repeats for /g and /G */
1165    
1166      else      else
1167    #endif  /* !defined NOPOSIX */
1168    
1169        for (;; gmatched++)    /* Loop for /g or /G */
1170        {        {
1171        if (timeit)        if (timeit)
1172          {          {
1173          register int i;          register int i;
1174          clock_t time_taken;          clock_t time_taken;
1175          clock_t start_time = clock();          clock_t start_time = clock();
1176          for (i = 0; i < 4000; i++)          for (i = 0; i < LOOPREPEAT; i++)
1177            count = pcre_exec(re, extra, (char *)dbuffer, len, options, offsets,            count = pcre_exec(re, extra, (char *)bptr, len,
1178              size_offsets);              start_offset, options | g_notempty, use_offsets, use_size_offsets);
1179          time_taken = clock() - start_time;          time_taken = clock() - start_time;
1180          fprintf(outfile, "Execute time %.2f milliseconds\n",          fprintf(outfile, "Execute time %.3f milliseconds\n",
1181            ((double)time_taken)/(4 * CLOCKS_PER_SEC));            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
1182                (double)CLOCKS_PER_SEC);
1183          }          }
1184    
1185        count = pcre_exec(re, extra, (char *)dbuffer, len, options, offsets,        /* If find_match_limit is set, we want to do repeated matches with
1186          size_offsets);        varying limits in order to find the minimum value. */
1187    
1188          if (find_match_limit)
1189            {
1190            int min = 0;
1191            int mid = 64;
1192            int max = -1;
1193    
1194            if (extra == NULL)
1195              {
1196              extra = malloc(sizeof(pcre_extra));
1197              extra->flags = 0;
1198              }
1199            extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1200    
1201            for (;;)
1202              {
1203              extra->match_limit = mid;
1204              count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
1205                options | g_notempty, use_offsets, use_size_offsets);
1206              if (count == PCRE_ERROR_MATCHLIMIT)
1207                {
1208                /* fprintf(outfile, "Testing match limit = %d\n", mid); */
1209                min = mid;
1210                mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1211                }
1212              else if (count >= 0 || count == PCRE_ERROR_NOMATCH)
1213                {
1214                if (mid == min + 1)
1215                  {
1216                  fprintf(outfile, "Minimum match limit = %d\n", mid);
1217                  break;
1218                  }
1219                /* fprintf(outfile, "Testing match limit = %d\n", mid); */
1220                max = mid;
1221                mid = (min + mid)/2;
1222                }
1223              else break;    /* Some other error */
1224              }
1225    
1226            extra->flags &= ~PCRE_EXTRA_MATCH_LIMIT;
1227            }
1228    
1229          /* If callout_data is set, use the interface with additional data */
1230    
1231          else if (callout_data_set)
1232            {
1233            if (extra == NULL)
1234              {
1235              extra = malloc(sizeof(pcre_extra));
1236              extra->flags = 0;
1237              }
1238            extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
1239            extra->callout_data = (void *)callout_data;
1240            count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
1241              options | g_notempty, use_offsets, use_size_offsets);
1242            extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
1243            }
1244    
1245          /* The normal case is just to do the match once, with the default
1246          value of match_limit. */
1247    
1248          else count = pcre_exec(re, extra, (char *)bptr, len,
1249            start_offset, options | g_notempty, use_offsets, use_size_offsets);
1250    
1251        if (count == 0)        if (count == 0)
1252          {          {
1253          fprintf(outfile, "Matched, but too many substrings\n");          fprintf(outfile, "Matched, but too many substrings\n");
1254          count = size_offsets/2;          count = use_size_offsets/3;
1255          }          }
1256    
1257          /* Matched */
1258    
1259        if (count >= 0)        if (count >= 0)
1260          {          {
1261          int i;          int i;
1262          count *= 2;          for (i = 0; i < count * 2; i += 2)
         for (i = 0; i < count; i += 2)  
1263            {            {
1264            if (offsets[i] < 0)            if (use_offsets[i] < 0)
1265              fprintf(outfile, "%2d: <unset>\n", i/2);              fprintf(outfile, "%2d: <unset>\n", i/2);
1266            else            else
1267              {              {
1268              fprintf(outfile, "%2d: ", i/2);              fprintf(outfile, "%2d: ", i/2);
1269              pchars(dbuffer + offsets[i], offsets[i+1] - offsets[i]);              (void)pchars(bptr + use_offsets[i],
1270                  use_offsets[i+1] - use_offsets[i], outfile);
1271              fprintf(outfile, "\n");              fprintf(outfile, "\n");
1272                if (i == 0)
1273                  {
1274                  if (do_showrest)
1275                    {
1276                    fprintf(outfile, " 0+ ");
1277                    (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
1278                      outfile);
1279                    fprintf(outfile, "\n");
1280                    }
1281                  }
1282                }
1283              }
1284    
1285            for (i = 0; i < 32; i++)
1286              {
1287              if ((copystrings & (1 << i)) != 0)
1288                {
1289                char copybuffer[16];
1290                int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
1291                  i, copybuffer, sizeof(copybuffer));
1292                if (rc < 0)
1293                  fprintf(outfile, "copy substring %d failed %d\n", i, rc);
1294                else
1295                  fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
1296                }
1297              }
1298    
1299            for (i = 0; i < 32; i++)
1300              {
1301              if ((getstrings & (1 << i)) != 0)
1302                {
1303                const char *substring;
1304                int rc = pcre_get_substring((char *)bptr, use_offsets, count,
1305                  i, &substring);
1306                if (rc < 0)
1307                  fprintf(outfile, "get substring %d failed %d\n", i, rc);
1308                else
1309                  {
1310                  fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
1311                  /* free((void *)substring); */
1312                  pcre_free_substring(substring);
1313                  }
1314                }
1315              }
1316    
1317            if (getlist)
1318              {
1319              const char **stringlist;
1320              int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
1321                &stringlist);
1322              if (rc < 0)
1323                fprintf(outfile, "get substring list failed %d\n", rc);
1324              else
1325                {
1326                for (i = 0; i < count; i++)
1327                  fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
1328                if (stringlist[i] != NULL)
1329                  fprintf(outfile, "string list not terminated by NULL\n");
1330                /* free((void *)stringlist); */
1331                pcre_free_substring_list(stringlist);
1332              }              }
1333            }            }
1334          }          }
1335    
1336          /* Failed to match. If this is a /g or /G loop and we previously set
1337          g_notempty after a null match, this is not necessarily the end.
1338          We want to advance the start offset, and continue. Fudge the offset
1339          values to achieve this. We won't be at the end of the string - that
1340          was checked before setting g_notempty. */
1341    
1342        else        else
1343          {          {
1344          if (count == -1) fprintf(outfile, "No match\n");          if (g_notempty != 0)
1345            else fprintf(outfile, "Error %d\n", count);            {
1346              use_offsets[0] = start_offset;
1347              use_offsets[1] = start_offset + 1;
1348              }
1349            else
1350              {
1351              if (gmatched == 0)   /* Error if no previous matches */
1352                {
1353                if (count == -1) fprintf(outfile, "No match\n");
1354                  else fprintf(outfile, "Error %d\n", count);
1355                }
1356              break;  /* Out of the /g loop */
1357              }
1358          }          }
       }  
     }  
1359    
1360          /* If not /g or /G we are done */
1361    
1362          if (!do_g && !do_G) break;
1363    
1364          /* If we have matched an empty string, first check to see if we are at
1365          the end of the subject. If so, the /g loop is over. Otherwise, mimic
1366          what Perl's /g options does. This turns out to be rather cunning. First
1367          we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
1368          same point. If this fails (picked up above) we advance to the next
1369          character. */
1370    
1371          g_notempty = 0;
1372          if (use_offsets[0] == use_offsets[1])
1373            {
1374            if (use_offsets[0] == len) break;
1375            g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
1376            }
1377    
1378          /* For /g, update the start offset, leaving the rest alone */
1379    
1380          if (do_g) start_offset = use_offsets[1];
1381    
1382          /* For /G, update the pointer and length */
1383    
1384          else
1385            {
1386            bptr += use_offsets[1];
1387            len -= use_offsets[1];
1388            }
1389          }  /* End of loop for /g and /G */
1390        }    /* End of loop for data lines */
1391    
1392      CONTINUE:
1393    
1394    #if !defined NOPOSIX
1395    if (posix || do_posix) regfree(&preg);    if (posix || do_posix) regfree(&preg);
1396    #endif
1397    
1398    if (re != NULL) free(re);    if (re != NULL) free(re);
1399    if (extra != NULL) free(extra);    if (extra != NULL) free(extra);
1400      if (tables != NULL)
1401        {
1402        free((void *)tables);
1403        setlocale(LC_CTYPE, "C");
1404        }
1405    }    }
1406    
 END_OFF:  
1407  fprintf(outfile, "\n");  fprintf(outfile, "\n");
1408  return 0;  return 0;
1409  }  }

Legend:
Removed from v.7  
changed lines
  Added in v.67

  ViewVC Help
Powered by ViewVC 1.1.5