/[pcre]/code/branches/pcre16/pcretest.c
ViewVC logotype

Diff of /code/branches/pcre16/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 45 by nigel, Sat Feb 24 21:39:25 2007 UTC revision 73 by nigel, Sat Feb 24 21:40:30 2007 UTC
# Line 2  Line 2 
2  *             PCRE testing program               *  *             PCRE testing program               *
3  *************************************************/  *************************************************/
4    
5    /* This program was hacked up as a tester for PCRE. I really should have
6    written it more tidily in the first place. Will I ever learn? It has grown and
7    been extended and consequently is now rather untidy in places. */
8    
9  #include <ctype.h>  #include <ctype.h>
10  #include <stdio.h>  #include <stdio.h>
11  #include <string.h>  #include <string.h>
# Line 9  Line 13 
13  #include <time.h>  #include <time.h>
14  #include <locale.h>  #include <locale.h>
15    
16  /* Use the internal info for displaying the results of pcre_study(). */  /* We need the internal info for displaying the results of pcre_study(). Also
17    for getting the opcodes for showing compiled code. */
18    
19    #define PCRE_SPY        /* For Win32 build, import data, not export */
20  #include "internal.h"  #include "internal.h"
21    
22  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
# Line 29  Makefile. */ Line 35  Makefile. */
35  #endif  #endif
36  #endif  #endif
37    
38  #define LOOPREPEAT 20000  #define LOOPREPEAT 50000
39    
40    #define BUFFER_SIZE 30000
41    #define DBUFFER_SIZE BUFFER_SIZE
42    
43    
44  static FILE *outfile;  static FILE *outfile;
45  static int log_store = 0;  static int log_store = 0;
46    static int callout_count;
47    static int callout_extra;
48    static int callout_fail_count;
49    static int callout_fail_id;
50    static int first_callout;
51    static int show_malloc;
52    static int use_utf8;
53  static size_t gotten_store;  static size_t gotten_store;
54    
55    
56    static const int utf8_table1[] = {
57      0x0000007f, 0x000007ff, 0x0000ffff, 0x001fffff, 0x03ffffff, 0x7fffffff};
58    
59    static const int utf8_table2[] = {
60      0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
61    
62    static const int utf8_table3[] = {
63      0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
64    
65    
66    
67    /*************************************************
68    *         Print compiled regex                   *
69    *************************************************/
70    
71    /* The code for doing this is held in a separate file that is also included in
72    pcre.c when it is compiled with the debug switch. It defines a function called
73    print_internals(), which uses a table of opcode lengths defined by the macro
74    OP_LENGTHS, whose name must be OP_lengths. */
75    
76    static uschar OP_lengths[] = { OP_LENGTHS };
77    
78    #include "printint.c"
79    
80    
81    
82    /*************************************************
83    *          Read number from string               *
84    *************************************************/
85    
86    /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
87    around with conditional compilation, just do the job by hand. It is only used
88    for unpicking the -o argument, so just keep it simple.
89    
90    Arguments:
91      str           string to be converted
92      endptr        where to put the end pointer
93    
94    Returns:        the unsigned long
95    */
96    
97    static int
98    get_value(unsigned char *str, unsigned char **endptr)
99    {
100    int result = 0;
101    while(*str != 0 && isspace(*str)) str++;
102    while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
103    *endptr = str;
104    return(result);
105    }
106    
107    
 /* Debugging function to print the internal form of the regex. This is the same  
 code as contained in pcre.c under the DEBUG macro. */  
108    
109  static const char *OP_names[] = {  /*************************************************
110    "End", "\\A", "\\B", "\\b", "\\D", "\\d",  *       Convert character value to UTF-8         *
111    "\\S", "\\s", "\\W", "\\w", "\\Z", "\\z",  *************************************************/
112    "Opt", "^", "$", "Any", "chars", "not",  
113    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  /* This function takes an integer value in the range 0 - 0x7fffffff
114    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  and encodes it as a UTF-8 character in 0 to 6 bytes.
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
   "*", "*?", "+", "+?", "?", "??", "{", "{",  
   "class", "Ref", "Recurse",  
   "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",  
   "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",  
   "Brazero", "Braminzero", "Bra"  
 };  
115    
116    Arguments:
117      cvalue     the character value
118      buffer     pointer to buffer for result - at least 6 bytes long
119    
120    Returns:     number of characters placed in the buffer
121                 -1 if input character is negative
122                 0 if input character is positive but too big (only when
123                 int is longer than 32 bits)
124    */
125    
126  static void print_internals(pcre *re)  static int
127    ord2utf8(int cvalue, unsigned char *buffer)
128  {  {
129  unsigned char *code = ((real_pcre *)re)->code;  register int i, j;
130    for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++)
131      if (cvalue <= utf8_table1[i]) break;
132    if (i >= sizeof(utf8_table1)/sizeof(int)) return 0;
133    if (cvalue < 0) return -1;
134    
135    buffer += i;
136    for (j = i; j > 0; j--)
137     {
138     *buffer-- = 0x80 | (cvalue & 0x3f);
139     cvalue >>= 6;
140     }
141    *buffer = utf8_table2[i] | cvalue;
142    return i + 1;
143    }
144    
145    
146    /*************************************************
147    *            Convert UTF-8 string to value       *
148    *************************************************/
149    
150    /* This function takes one or more bytes that represents a UTF-8 character,
151    and returns the value of the character.
152    
153  fprintf(outfile, "------------------------------------------------------------------\n");  Argument:
154      buffer   a pointer to the byte vector
155      vptr     a pointer to an int to receive the value
156    
157    Returns:   >  0 => the number of bytes consumed
158               -6 to 0 => malformed UTF-8 character at offset = (-return)
159    */
160    
161  for(;;)  static int
162    utf82ord(unsigned char *buffer, int *vptr)
163    {
164    int c = *buffer++;
165    int d = c;
166    int i, j, s;
167    
168    for (i = -1; i < 6; i++)               /* i is number of additional bytes */
169    {    {
170    int c;    if ((d & 0x80) == 0) break;
171    int charlength;    d <<= 1;
172      }
173    
174    fprintf(outfile, "%3d ", (int)(code - ((real_pcre *)re)->code));  if (i == -1) { *vptr = c; return 1; }  /* ascii character */
175    if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
176    
177    if (*code >= OP_BRA)  /* i now has a value in the range 1-5 */
     {  
     fprintf(outfile, "%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);  
     code += 2;  
     }  
178    
179    else switch(*code)  s = 6*i;
180      {  d = (c & utf8_table3[i]) << s;
     case OP_END:  
     fprintf(outfile, "    %s\n", OP_names[*code]);  
     fprintf(outfile, "------------------------------------------------------------------\n");  
     return;  
   
     case OP_OPT:  
     fprintf(outfile, " %.2x %s", code[1], OP_names[*code]);  
     code++;  
     break;  
   
     case OP_COND:  
     fprintf(outfile, "%3d Cond", (code[1] << 8) + code[2]);  
     code += 2;  
     break;  
   
     case OP_CREF:  
     fprintf(outfile, " %.2d %s", code[1], OP_names[*code]);  
     code++;  
     break;  
   
     case OP_CHARS:  
     charlength = *(++code);  
     fprintf(outfile, "%3d ", charlength);  
     while (charlength-- > 0)  
       if (isprint(c = *(++code))) fprintf(outfile, "%c", c);  
         else fprintf(outfile, "\\x%02x", c);  
     break;  
   
     case OP_KETRMAX:  
     case OP_KETRMIN:  
     case OP_ALT:  
     case OP_KET:  
     case OP_ASSERT:  
     case OP_ASSERT_NOT:  
     case OP_ASSERTBACK:  
     case OP_ASSERTBACK_NOT:  
     case OP_ONCE:  
     fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);  
     code += 2;  
     break;  
   
     case OP_REVERSE:  
     fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);  
     code += 2;  
     break;  
   
     case OP_STAR:  
     case OP_MINSTAR:  
     case OP_PLUS:  
     case OP_MINPLUS:  
     case OP_QUERY:  
     case OP_MINQUERY:  
     case OP_TYPESTAR:  
     case OP_TYPEMINSTAR:  
     case OP_TYPEPLUS:  
     case OP_TYPEMINPLUS:  
     case OP_TYPEQUERY:  
     case OP_TYPEMINQUERY:  
     if (*code >= OP_TYPESTAR)  
       fprintf(outfile, "    %s", OP_names[code[1]]);  
     else if (isprint(c = code[1])) fprintf(outfile, "    %c", c);  
       else fprintf(outfile, "    \\x%02x", c);  
     fprintf(outfile, "%s", OP_names[*code++]);  
     break;  
   
     case OP_EXACT:  
     case OP_UPTO:  
     case OP_MINUPTO:  
     if (isprint(c = code[3])) fprintf(outfile, "    %c{", c);  
       else fprintf(outfile, "    \\x%02x{", c);  
     if (*code != OP_EXACT) fprintf(outfile, ",");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_MINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_TYPEEXACT:  
     case OP_TYPEUPTO:  
     case OP_TYPEMINUPTO:  
     fprintf(outfile, "    %s{", OP_names[code[3]]);  
     if (*code != OP_TYPEEXACT) fprintf(outfile, "0,");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_TYPEMINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_NOT:  
     if (isprint(c = *(++code))) fprintf(outfile, "    [^%c]", c);  
       else fprintf(outfile, "    [^\\x%02x]", c);  
     break;  
   
     case OP_NOTSTAR:  
     case OP_NOTMINSTAR:  
     case OP_NOTPLUS:  
     case OP_NOTMINPLUS:  
     case OP_NOTQUERY:  
     case OP_NOTMINQUERY:  
     if (isprint(c = code[1])) fprintf(outfile, "    [^%c]", c);  
       else fprintf(outfile, "    [^\\x%02x]", c);  
     fprintf(outfile, "%s", OP_names[*code++]);  
     break;  
   
     case OP_NOTEXACT:  
     case OP_NOTUPTO:  
     case OP_NOTMINUPTO:  
     if (isprint(c = code[3])) fprintf(outfile, "    [^%c]{", c);  
       else fprintf(outfile, "    [^\\x%02x]{", c);  
     if (*code != OP_NOTEXACT) fprintf(outfile, ",");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_NOTMINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_REF:  
     fprintf(outfile, "    \\%d", *(++code));  
     code++;  
     goto CLASS_REF_REPEAT;  
181    
182      case OP_CLASS:  for (j = 0; j < i; j++)
183        {    {
184        int i, min, max;    c = *buffer++;
185        code++;    if ((c & 0xc0) != 0x80) return -(j+1);
186        fprintf(outfile, "    [");    s -= 6;
187      d |= (c & 0x3f) << s;
188        for (i = 0; i < 256; i++)    }
         {  
         if ((code[i/8] & (1 << (i&7))) != 0)  
           {  
           int j;  
           for (j = i+1; j < 256; j++)  
             if ((code[j/8] & (1 << (j&7))) == 0) break;  
           if (i == '-' || i == ']') fprintf(outfile, "\\");  
           if (isprint(i)) fprintf(outfile, "%c", i); else fprintf(outfile, "\\x%02x", i);  
           if (--j > i)  
             {  
             fprintf(outfile, "-");  
             if (j == '-' || j == ']') fprintf(outfile, "\\");  
             if (isprint(j)) fprintf(outfile, "%c", j); else fprintf(outfile, "\\x%02x", j);  
             }  
           i = j;  
           }  
         }  
       fprintf(outfile, "]");  
       code += 32;  
189    
190        CLASS_REF_REPEAT:  /* Check that encoding was the correct unique one */
191    
192        switch(*code)  for (j = 0; j < sizeof(utf8_table1)/sizeof(int); j++)
193          {    if (d <= utf8_table1[j]) break;
194          case OP_CRSTAR:  if (j != i) return -(i+1);
         case OP_CRMINSTAR:  
         case OP_CRPLUS:  
         case OP_CRMINPLUS:  
         case OP_CRQUERY:  
         case OP_CRMINQUERY:  
         fprintf(outfile, "%s", OP_names[*code]);  
         break;  
195    
196          case OP_CRRANGE:  /* Valid value */
197          case OP_CRMINRANGE:  
198          min = (code[1] << 8) + code[2];  *vptr = d;
199          max = (code[3] << 8) + code[4];  return i+1;
200          if (max == 0) fprintf(outfile, "{%d,}", min);  }
         else fprintf(outfile, "{%d,%d}", min, max);  
         if (*code == OP_CRMINRANGE) fprintf(outfile, "?");  
         code += 4;  
         break;  
201    
202          default:  
203          code--;  
204    /*************************************************
205    *             Print character string             *
206    *************************************************/
207    
208    /* Character string printing function. Must handle UTF-8 strings in utf8
209    mode. Yields number of characters printed. If handed a NULL file, just counts
210    chars without printing. */
211    
212    static int pchars(unsigned char *p, int length, FILE *f)
213    {
214    int c;
215    int yield = 0;
216    
217    while (length-- > 0)
218      {
219      if (use_utf8)
220        {
221        int rc = utf82ord(p, &c);
222    
223        if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
224          {
225          length -= rc - 1;
226          p += rc;
227          if (c < 256 && isprint(c))
228            {
229            if (f != NULL) fprintf(f, "%c", c);
230            yield++;
231            }
232          else
233            {
234            int n;
235            if (f != NULL) fprintf(f, "\\x{%02x}%n", c, &n);
236            yield += n;
237          }          }
238          continue;
239        }        }
240      break;      }
241    
242      /* Anything else is just a one-node item */     /* Not UTF-8, or malformed UTF-8  */
243    
244      default:    if (isprint(c = *(p++)))
245      fprintf(outfile, "    %s", OP_names[*code]);      {
246      break;      if (f != NULL) fprintf(f, "%c", c);
247        yield++;
248        }
249      else
250        {
251        if (f != NULL) fprintf(f, "\\x%02x", c);
252        yield += 4;
253      }      }
   
   code++;  
   fprintf(outfile, "\n");  
254    }    }
255    
256    return yield;
257  }  }
258    
259    
260    
261  /* Character string printing function. */  /*************************************************
262    *              Callout function                  *
263    *************************************************/
264    
265    /* Called from PCRE as a result of the (?C) item. We print out where we are in
266    the match. Yield zero unless more callouts than the fail count, or the callout
267    data is not zero. */
268    
269  static void pchars(unsigned char *p, int length)  static int callout(pcre_callout_block *cb)
270  {  {
271  int c;  FILE *f = (first_callout | callout_extra)? outfile : NULL;
272  while (length-- > 0)  int i, pre_start, post_start;
273    if (isprint(c = *(p++))) fprintf(outfile, "%c", c);  
274      else fprintf(outfile, "\\x%02x", c);  if (callout_extra)
275      {
276      fprintf(f, "Callout %d: last capture = %d\n",
277        cb->callout_number, cb->capture_last);
278    
279      for (i = 0; i < cb->capture_top * 2; i += 2)
280        {
281        if (cb->offset_vector[i] < 0)
282          fprintf(f, "%2d: <unset>\n", i/2);
283        else
284          {
285          fprintf(f, "%2d: ", i/2);
286          (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
287            cb->offset_vector[i+1] - cb->offset_vector[i], f);
288          fprintf(f, "\n");
289          }
290        }
291      }
292    
293    /* Re-print the subject in canonical form, the first time or if giving full
294    datails. On subsequent calls in the same match, we use pchars just to find the
295    printed lengths of the substrings. */
296    
297    if (f != NULL) fprintf(f, "--->");
298    
299    pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
300    post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
301      cb->current_position - cb->start_match, f);
302    
303    (void)pchars((unsigned char *)(cb->subject + cb->current_position),
304      cb->subject_length - cb->current_position, f);
305    
306    if (f != NULL) fprintf(f, "\n");
307    
308    /* Always print appropriate indicators, with callout number if not already
309    shown */
310    
311    if (callout_extra) fprintf(outfile, "    ");
312      else fprintf(outfile, "%3d ", cb->callout_number);
313    
314    for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
315    fprintf(outfile, "^");
316    
317    if (post_start > 0)
318      {
319      for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
320      fprintf(outfile, "^");
321      }
322    
323    fprintf(outfile, "\n");
324    first_callout = 0;
325    
326    if (cb->callout_data != NULL)
327      {
328      int callout_data = *((int *)(cb->callout_data));
329      if (callout_data != 0)
330        {
331        fprintf(outfile, "Callout data = %d\n", callout_data);
332        return callout_data;
333        }
334      }
335    
336    return (cb->callout_number != callout_fail_id)? 0 :
337           (++callout_count >= callout_fail_count)? 1 : 0;
338  }  }
339    
340    
341    /*************************************************
342    *            Local malloc functions              *
343    *************************************************/
344    
345  /* Alternative malloc function, to test functionality and show the size of the  /* Alternative malloc function, to test functionality and show the size of the
346  compiled re. */  compiled re. */
347    
348  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
349  {  {
350    void *block = malloc(size);
351  gotten_store = size;  gotten_store = size;
352  if (log_store)  if (show_malloc)
353    fprintf(outfile, "Memory allocation (code space): %d\n",    fprintf(outfile, "malloc       %3d %p\n", size, block);
354      (int)((int)size - offsetof(real_pcre, code[0])));  return block;
355  return malloc(size);  }
356    
357    static void new_free(void *block)
358    {
359    if (show_malloc)
360      fprintf(outfile, "free             %p\n", block);
361    free(block);
362    }
363    
364    
365    /* For recursion malloc/free, to test stacking calls */
366    
367    static void *stack_malloc(size_t size)
368    {
369    void *block = malloc(size);
370    if (show_malloc)
371      fprintf(outfile, "stack_malloc %3d %p\n", size, block);
372    return block;
373  }  }
374    
375    static void stack_free(void *block)
376    {
377    if (show_malloc)
378      fprintf(outfile, "stack_free       %p\n", block);
379    free(block);
380    }
381    
382    
383    /*************************************************
384    *          Call pcre_fullinfo()                  *
385    *************************************************/
386    
387  /* Get one piece of information from the pcre_fullinfo() function */  /* Get one piece of information from the pcre_fullinfo() function */
388    
# Line 303  if ((rc = pcre_fullinfo(re, study, optio Line 395  if ((rc = pcre_fullinfo(re, study, optio
395    
396    
397    
398    /*************************************************
399    *                Main Program                    *
400    *************************************************/
401    
402  /* Read lines from named file or stdin and write to named file or stdout; lines  /* Read lines from named file or stdin and write to named file or stdout; lines
403  consist of a regular expression, in delimiters and optionally followed by  consist of a regular expression, in delimiters and optionally followed by
# Line 317  int op = 1; Line 412  int op = 1;
412  int timeit = 0;  int timeit = 0;
413  int showinfo = 0;  int showinfo = 0;
414  int showstore = 0;  int showstore = 0;
415    int size_offsets = 45;
416    int size_offsets_max;
417    int *offsets;
418    #if !defined NOPOSIX
419  int posix = 0;  int posix = 0;
420    #endif
421  int debug = 0;  int debug = 0;
422  int done = 0;  int done = 0;
423  unsigned char buffer[30000];  
424  unsigned char dbuffer[1024];  unsigned char *buffer;
425    unsigned char *dbuffer;
426    
427    /* Get buffers from malloc() so that Electric Fence will check their misuse
428    when I am debugging. */
429    
430    buffer = (unsigned char *)malloc(BUFFER_SIZE);
431    dbuffer = (unsigned char *)malloc(DBUFFER_SIZE);
432    
433  /* Static so that new_malloc can use it. */  /* Static so that new_malloc can use it. */
434    
# Line 331  outfile = stdout; Line 438  outfile = stdout;
438    
439  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
440    {    {
441      unsigned char *endptr;
442    
443    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
444      showstore = 1;      showstore = 1;
445    else if (strcmp(argv[op], "-t") == 0) timeit = 1;    else if (strcmp(argv[op], "-t") == 0) timeit = 1;
446    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
447    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
448      else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
449          ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
450            *endptr == 0))
451        {
452        op++;
453        argc--;
454        }
455    #if !defined NOPOSIX
456    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(argv[op], "-p") == 0) posix = 1;
457    #endif
458      else if (strcmp(argv[op], "-C") == 0)
459        {
460        int rc;
461        printf("PCRE version %s\n", pcre_version());
462        printf("Compiled with\n");
463        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
464        printf("  %sUTF-8 support\n", rc? "" : "No ");
465        (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
466        printf("  Newline character is %s\n", (rc == '\r')? "CR" : "LF");
467        (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
468        printf("  Internal link size = %d\n", rc);
469        (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
470        printf("  POSIX malloc threshold = %d\n", rc);
471        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
472        printf("  Default match limit = %d\n", rc);
473        (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
474        printf("  Match recursion uses %s\n", rc? "stack" : "heap");
475        exit(0);
476        }
477    else    else
478      {      {
479      printf("*** Unknown option %s\n", argv[op]);      printf("** Unknown or malformed option %s\n", argv[op]);
480      printf("Usage: pcretest [-d] [-i] [-p] [-s] [-t] [<input> [<output>]]\n");      printf("Usage:   pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");
481      printf("  -d   debug: show compiled code; implies -i\n"      printf("  -C     show PCRE compile-time options and exit\n");
482             "  -i   show information about compiled pattern\n"      printf("  -d     debug: show compiled code; implies -i\n"
483             "  -p   use POSIX interface\n"             "  -i     show information about compiled pattern\n"
484             "  -s   output store information\n"             "  -o <n> set size of offsets vector to <n>\n");
485             "  -t   time compilation and execution\n");  #if !defined NOPOSIX
486        printf("  -p     use POSIX interface\n");
487    #endif
488        printf("  -s     output store information\n"
489               "  -t     time compilation and execution\n");
490      return 1;      return 1;
491      }      }
492    op++;    op++;
493    argc--;    argc--;
494    }    }
495    
496    /* Get the store for the offsets vector, and remember what it was */
497    
498    size_offsets_max = size_offsets;
499    offsets = (int *)malloc(size_offsets_max * sizeof(int));
500    if (offsets == NULL)
501      {
502      printf("** Failed to get %d bytes of memory for offsets vector\n",
503        size_offsets_max * sizeof(int));
504      return 1;
505      }
506    
507  /* Sort out the input and output files */  /* Sort out the input and output files */
508    
509  if (argc > 1)  if (argc > 1)
# Line 377  if (argc > 2) Line 529  if (argc > 2)
529  /* Set alternative malloc function */  /* Set alternative malloc function */
530    
531  pcre_malloc = new_malloc;  pcre_malloc = new_malloc;
532    pcre_free = new_free;
533    pcre_stack_malloc = stack_malloc;
534    pcre_stack_free = stack_free;
535    
536  /* Heading line, then prompt for first regex if stdin */  /* Heading line, then prompt for first regex if stdin */
537    
# Line 396  while (!done) Line 551  while (!done)
551    
552    const char *error;    const char *error;
553    unsigned char *p, *pp, *ppp;    unsigned char *p, *pp, *ppp;
554    unsigned const char *tables = NULL;    const unsigned char *tables = NULL;
555    int do_study = 0;    int do_study = 0;
556    int do_debug = debug;    int do_debug = debug;
557    int do_G = 0;    int do_G = 0;
# Line 405  while (!done) Line 560  while (!done)
560    int do_showrest = 0;    int do_showrest = 0;
561    int erroroffset, len, delimiter;    int erroroffset, len, delimiter;
562    
563      use_utf8 = 0;
564    
565    if (infile == stdin) printf("  re> ");    if (infile == stdin) printf("  re> ");
566    if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;    if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL) break;
567    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
568      fflush(outfile);
569    
570    p = buffer;    p = buffer;
571    while (isspace(*p)) p++;    while (isspace(*p)) p++;
# Line 436  while (!done) Line 594  while (!done)
594        }        }
595      if (*pp != 0) break;      if (*pp != 0) break;
596    
597      len = sizeof(buffer) - (pp - buffer);      len = BUFFER_SIZE - (pp - buffer);
598      if (len < 256)      if (len < 256)
599        {        {
600        fprintf(outfile, "** Expression too long - missing delimiter?\n");        fprintf(outfile, "** Expression too long - missing delimiter?\n");
# Line 486  while (!done) Line 644  while (!done)
644        case 'G': do_G = 1; break;        case 'G': do_G = 1; break;
645        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
646        case 'M': log_store = 1; break;        case 'M': log_store = 1; break;
647          case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
648    
649  #if !defined NOPOSIX  #if !defined NOPOSIX
650        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
# Line 494  while (!done) Line 653  while (!done)
653        case 'S': do_study = 1; break;        case 'S': do_study = 1; break;
654        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
655        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
656          case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
657          case '?': options |= PCRE_NO_UTF8_CHECK; break;
658    
659        case 'L':        case 'L':
660        ppp = pp;        ppp = pp;
# Line 533  while (!done) Line 694  while (!done)
694    
695      if (rc != 0)      if (rc != 0)
696        {        {
697        (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));        (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);
698        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
699        goto SKIP_DATA;        goto SKIP_DATA;
700        }        }
# Line 557  while (!done) Line 718  while (!done)
718          }          }
719        time_taken = clock() - start_time;        time_taken = clock() - start_time;
720        fprintf(outfile, "Compile time %.3f milliseconds\n",        fprintf(outfile, "Compile time %.3f milliseconds\n",
721          ((double)time_taken * 1000.0) /          (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
722          ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));            (double)CLOCKS_PER_SEC);
723        }        }
724    
725      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
# Line 574  while (!done) Line 735  while (!done)
735          {          {
736          for (;;)          for (;;)
737            {            {
738            if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)            if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)
739              {              {
740              done = 1;              done = 1;
741              goto CONTINUE;              goto CONTINUE;
# Line 592  while (!done) Line 753  while (!done)
753      info-returning functions. The old one has a limited interface and      info-returning functions. The old one has a limited interface and
754      returns only limited data. Check that it agrees with the newer one. */      returns only limited data. Check that it agrees with the newer one. */
755    
756        if (log_store)
757          fprintf(outfile, "Memory allocation (code space): %d\n",
758            (int)(gotten_store -
759                  sizeof(real_pcre) -
760                  ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
761    
762      if (do_showinfo)      if (do_showinfo)
763        {        {
764          unsigned long int get_options;
765        int old_first_char, old_options, old_count;        int old_first_char, old_options, old_count;
766        int count, backrefmax, first_char, need_char;        int count, backrefmax, first_char, need_char;
767          int nameentrysize, namecount;
768          const uschar *nametable;
769        size_t size;        size_t size;
770    
771        if (do_debug) print_internals(re);        if (do_debug)
772            {
773            fprintf(outfile, "------------------------------------------------------------------\n");
774            print_internals(re, outfile);
775            }
776    
777        new_info(re, NULL, PCRE_INFO_OPTIONS, &options);        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
778        new_info(re, NULL, PCRE_INFO_SIZE, &size);        new_info(re, NULL, PCRE_INFO_SIZE, &size);
779        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
780        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
781        new_info(re, NULL, PCRE_INFO_FIRSTCHAR, &first_char);        new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
782        new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);        new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
783          new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
784          new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
785          new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
786    
787        old_count = pcre_info(re, &old_options, &old_first_char);        old_count = pcre_info(re, &old_options, &old_first_char);
788        if (count < 0) fprintf(outfile,        if (count < 0) fprintf(outfile,
# Line 620  while (!done) Line 797  while (!done)
797            "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",            "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
798              first_char, old_first_char);              first_char, old_first_char);
799    
800          if (old_options != options) fprintf(outfile,          if (old_options != (int)get_options) fprintf(outfile,
801            "Options disagreement: pcre_fullinfo=%d pcre_info=%d\n", options,            "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
802              old_options);              get_options, old_options);
803          }          }
804    
805        if (size != gotten_store) fprintf(outfile,        if (size != gotten_store) fprintf(outfile,
# Line 632  while (!done) Line 809  while (!done)
809        fprintf(outfile, "Capturing subpattern count = %d\n", count);        fprintf(outfile, "Capturing subpattern count = %d\n", count);
810        if (backrefmax > 0)        if (backrefmax > 0)
811          fprintf(outfile, "Max back reference = %d\n", backrefmax);          fprintf(outfile, "Max back reference = %d\n", backrefmax);
812        if (options == 0) fprintf(outfile, "No options\n");  
813          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s\n",        if (namecount > 0)
814            ((options & PCRE_ANCHORED) != 0)? " anchored" : "",          {
815            ((options & PCRE_CASELESS) != 0)? " caseless" : "",          fprintf(outfile, "Named capturing subpatterns:\n");
816            ((options & PCRE_EXTENDED) != 0)? " extended" : "",          while (namecount-- > 0)
817            ((options & PCRE_MULTILINE) != 0)? " multiline" : "",            {
818            ((options & PCRE_DOTALL) != 0)? " dotall" : "",            fprintf(outfile, "  %s %*s%3d\n", nametable + 2,
819            ((options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",              nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
820            ((options & PCRE_EXTRA) != 0)? " extra" : "",              GET2(nametable, 0));
821            ((options & PCRE_UNGREEDY) != 0)? " ungreedy" : "");            nametable += nameentrysize;
822              }
823            }
824    
825          if (get_options == 0) fprintf(outfile, "No options\n");
826            else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s\n",
827              ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
828              ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
829              ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
830              ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
831              ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
832              ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
833              ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
834              ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
835              ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
836              ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "");
837    
838        if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)        if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)
839          fprintf(outfile, "Case state changes\n");          fprintf(outfile, "Case state changes\n");
# Line 656  while (!done) Line 848  while (!done)
848          }          }
849        else        else
850          {          {
851          if (isprint(first_char))          int ch = first_char & 255;
852            fprintf(outfile, "First char = \'%c\'\n", first_char);          const char *caseless = ((first_char & REQ_CASELESS) == 0)?
853              "" : " (caseless)";
854            if (isprint(ch))
855              fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
856          else          else
857            fprintf(outfile, "First char = %d\n", first_char);            fprintf(outfile, "First char = %d%s\n", ch, caseless);
858          }          }
859    
860        if (need_char < 0)        if (need_char < 0)
# Line 668  while (!done) Line 863  while (!done)
863          }          }
864        else        else
865          {          {
866          if (isprint(need_char))          int ch = need_char & 255;
867            fprintf(outfile, "Need char = \'%c\'\n", need_char);          const char *caseless = ((need_char & REQ_CASELESS) == 0)?
868              "" : " (caseless)";
869            if (isprint(ch))
870              fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
871          else          else
872            fprintf(outfile, "Need char = %d\n", need_char);            fprintf(outfile, "Need char = %d%s\n", ch, caseless);
873          }          }
874        }        }
875    
# Line 690  while (!done) Line 888  while (!done)
888          time_taken = clock() - start_time;          time_taken = clock() - start_time;
889          if (extra != NULL) free(extra);          if (extra != NULL) free(extra);
890          fprintf(outfile, "  Study time %.3f milliseconds\n",          fprintf(outfile, "  Study time %.3f milliseconds\n",
891            ((double)time_taken * 1000.0)/            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
892            ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));              (double)CLOCKS_PER_SEC);
893          }          }
894    
895        extra = pcre_study(re, study_options, &error);        extra = pcre_study(re, study_options, &error);
# Line 700  while (!done) Line 898  while (!done)
898        else if (extra == NULL)        else if (extra == NULL)
899          fprintf(outfile, "Study returned NULL\n");          fprintf(outfile, "Study returned NULL\n");
900    
901          /* Don't output study size; at present it is in any case a fixed
902          value, but it varies, depending on the computer architecture, and
903          so messes up the test suite. */
904    
905        else if (do_showinfo)        else if (do_showinfo)
906          {          {
907            size_t size;
908          uschar *start_bits = NULL;          uschar *start_bits = NULL;
909            new_info(re, extra, PCRE_INFO_STUDYSIZE, &size);
910          new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);          new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
911            /* fprintf(outfile, "Study size = %d\n", size); */
912          if (start_bits == NULL)          if (start_bits == NULL)
913            fprintf(outfile, "No starting character set\n");            fprintf(outfile, "No starting character set\n");
914          else          else
# Line 744  while (!done) Line 949  while (!done)
949      {      {
950      unsigned char *q;      unsigned char *q;
951      unsigned char *bptr = dbuffer;      unsigned char *bptr = dbuffer;
952        int *use_offsets = offsets;
953        int use_size_offsets = size_offsets;
954        int callout_data = 0;
955        int callout_data_set = 0;
956      int count, c;      int count, c;
957      int copystrings = 0;      int copystrings = 0;
958        int find_match_limit = 0;
959      int getstrings = 0;      int getstrings = 0;
960      int getlist = 0;      int getlist = 0;
961      int gmatched = 0;      int gmatched = 0;
962      int start_offset = 0;      int start_offset = 0;
963      int g_notempty = 0;      int g_notempty = 0;
     int offsets[45];  
     int size_offsets = sizeof(offsets)/sizeof(int);  
964    
965      options = 0;      options = 0;
966    
967        pcre_callout = callout;
968        first_callout = 1;
969        callout_extra = 0;
970        callout_count = 0;
971        callout_fail_count = 999999;
972        callout_fail_id = -1;
973        show_malloc = 0;
974    
975      if (infile == stdin) printf("data> ");      if (infile == stdin) printf("data> ");
976      if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)      if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)
977        {        {
978        done = 1;        done = 1;
979        goto CONTINUE;        goto CONTINUE;
# Line 777  while (!done) Line 993  while (!done)
993        {        {
994        int i = 0;        int i = 0;
995        int n = 0;        int n = 0;
996    
997        if (c == '\\') switch ((c = *p++))        if (c == '\\') switch ((c = *p++))
998          {          {
999          case 'a': c =    7; break;          case 'a': c =    7; break;
# Line 796  while (!done) Line 1013  while (!done)
1013          break;          break;
1014    
1015          case 'x':          case 'x':
1016    
1017            /* Handle \x{..} specially - new Perl thing for utf8 */
1018    
1019            if (*p == '{')
1020              {
1021              unsigned char *pt = p;
1022              c = 0;
1023              while (isxdigit(*(++pt)))
1024                c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1025              if (*pt == '}')
1026                {
1027                unsigned char buff8[8];
1028                int ii, utn;
1029                utn = ord2utf8(c, buff8);
1030                for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1031                c = buff8[ii];   /* Last byte */
1032                p = pt + 1;
1033                break;
1034                }
1035              /* Not correct form; fall through */
1036              }
1037    
1038            /* Ordinary \x */
1039    
1040          c = 0;          c = 0;
1041          while (i++ < 2 && isxdigit(*p))          while (i++ < 2 && isxdigit(*p))
1042            {            {
# Line 817  while (!done) Line 1058  while (!done)
1058          continue;          continue;
1059    
1060          case 'C':          case 'C':
1061          while(isdigit(*p)) n = n * 10 + *p++ - '0';          if (isdigit(*p))    /* Set copy string */
1062          copystrings |= 1 << n;            {
1063              while(isdigit(*p)) n = n * 10 + *p++ - '0';
1064              copystrings |= 1 << n;
1065              }
1066            else if (isalnum(*p))
1067              {
1068              uschar name[256];
1069              uschar *npp = name;
1070              while (isalnum(*p)) *npp++ = *p++;
1071              *npp = 0;
1072              n = pcre_get_stringnumber(re, (char *)name);
1073              if (n < 0)
1074                fprintf(outfile, "no parentheses with name \"%s\"\n", name);
1075              else copystrings |= 1 << n;
1076              }
1077            else if (*p == '+')
1078              {
1079              callout_extra = 1;
1080              p++;
1081              }
1082            else if (*p == '-')
1083              {
1084              pcre_callout = NULL;
1085              p++;
1086              }
1087            else if (*p == '!')
1088              {
1089              callout_fail_id = 0;
1090              p++;
1091              while(isdigit(*p))
1092                callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1093              callout_fail_count = 0;
1094              if (*p == '!')
1095                {
1096                p++;
1097                while(isdigit(*p))
1098                  callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1099                }
1100              }
1101            else if (*p == '*')
1102              {
1103              int sign = 1;
1104              callout_data = 0;
1105              if (*(++p) == '-') { sign = -1; p++; }
1106              while(isdigit(*p))
1107                callout_data = callout_data * 10 + *p++ - '0';
1108              callout_data *= sign;
1109              callout_data_set = 1;
1110              }
1111          continue;          continue;
1112    
1113          case 'G':          case 'G':
1114          while(isdigit(*p)) n = n * 10 + *p++ - '0';          if (isdigit(*p))
1115          getstrings |= 1 << n;            {
1116              while(isdigit(*p)) n = n * 10 + *p++ - '0';
1117              getstrings |= 1 << n;
1118              }
1119            else if (isalnum(*p))
1120              {
1121              uschar name[256];
1122              uschar *npp = name;
1123              while (isalnum(*p)) *npp++ = *p++;
1124              *npp = 0;
1125              n = pcre_get_stringnumber(re, (char *)name);
1126              if (n < 0)
1127                fprintf(outfile, "no parentheses with name \"%s\"\n", name);
1128              else getstrings |= 1 << n;
1129              }
1130          continue;          continue;
1131    
1132          case 'L':          case 'L':
1133          getlist = 1;          getlist = 1;
1134          continue;          continue;
1135    
1136            case 'M':
1137            find_match_limit = 1;
1138            continue;
1139    
1140          case 'N':          case 'N':
1141          options |= PCRE_NOTEMPTY;          options |= PCRE_NOTEMPTY;
1142          continue;          continue;
1143    
1144          case 'O':          case 'O':
1145          while(isdigit(*p)) n = n * 10 + *p++ - '0';          while(isdigit(*p)) n = n * 10 + *p++ - '0';
1146          if (n <= (int)(sizeof(offsets)/sizeof(int))) size_offsets = n;          if (n > size_offsets_max)
1147              {
1148              size_offsets_max = n;
1149              free(offsets);
1150              use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1151              if (offsets == NULL)
1152                {
1153                printf("** Failed to get %d bytes of memory for offsets vector\n",
1154                  size_offsets_max * sizeof(int));
1155                return 1;
1156                }
1157              }
1158            use_size_offsets = n;
1159            if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */
1160            continue;
1161    
1162            case 'S':
1163            show_malloc = 1;
1164          continue;          continue;
1165    
1166          case 'Z':          case 'Z':
1167          options |= PCRE_NOTEOL;          options |= PCRE_NOTEOL;
1168          continue;          continue;
1169    
1170            case '?':
1171            options |= PCRE_NO_UTF8_CHECK;
1172            continue;
1173          }          }
1174        *q++ = c;        *q++ = c;
1175        }        }
# Line 849  while (!done) Line 1177  while (!done)
1177      len = q - dbuffer;      len = q - dbuffer;
1178    
1179      /* Handle matching via the POSIX interface, which does not      /* Handle matching via the POSIX interface, which does not
1180      support timing. */      support timing or playing with the match limit or callout data. */
1181    
1182  #if !defined NOPOSIX  #if !defined NOPOSIX
1183      if (posix || do_posix)      if (posix || do_posix)
1184        {        {
1185        int rc;        int rc;
1186        int eflags = 0;        int eflags = 0;
1187        regmatch_t pmatch[sizeof(offsets)/sizeof(int)];        regmatch_t *pmatch = NULL;
1188          if (use_size_offsets > 0)
1189            pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
1190        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1191        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1192    
1193        rc = regexec(&preg, (const char *)bptr, size_offsets, pmatch, eflags);        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
1194    
1195        if (rc != 0)        if (rc != 0)
1196          {          {
1197          (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));          (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);
1198          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1199          }          }
1200        else        else
1201          {          {
1202          size_t i;          size_t i;
1203          for (i = 0; i < size_offsets; i++)          for (i = 0; i < (size_t)use_size_offsets; i++)
1204            {            {
1205            if (pmatch[i].rm_so >= 0)            if (pmatch[i].rm_so >= 0)
1206              {              {
1207              fprintf(outfile, "%2d: ", (int)i);              fprintf(outfile, "%2d: ", (int)i);
1208              pchars(dbuffer + pmatch[i].rm_so,              (void)pchars(dbuffer + pmatch[i].rm_so,
1209                pmatch[i].rm_eo - pmatch[i].rm_so);                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
1210              fprintf(outfile, "\n");              fprintf(outfile, "\n");
1211              if (i == 0 && do_showrest)              if (i == 0 && do_showrest)
1212                {                {
1213                fprintf(outfile, " 0+ ");                fprintf(outfile, " 0+ ");
1214                pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo);                (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
1215                    outfile);
1216                fprintf(outfile, "\n");                fprintf(outfile, "\n");
1217                }                }
1218              }              }
1219            }            }
1220          }          }
1221          free(pmatch);
1222        }        }
1223    
1224      /* Handle matching via the native interface - repeats for /g and /G */      /* Handle matching via the native interface - repeats for /g and /G */
# Line 903  while (!done) Line 1235  while (!done)
1235          clock_t start_time = clock();          clock_t start_time = clock();
1236          for (i = 0; i < LOOPREPEAT; i++)          for (i = 0; i < LOOPREPEAT; i++)
1237            count = pcre_exec(re, extra, (char *)bptr, len,            count = pcre_exec(re, extra, (char *)bptr, len,
1238              start_offset, options | g_notempty, offsets, size_offsets);              start_offset, options | g_notempty, use_offsets, use_size_offsets);
1239          time_taken = clock() - start_time;          time_taken = clock() - start_time;
1240          fprintf(outfile, "Execute time %.3f milliseconds\n",          fprintf(outfile, "Execute time %.3f milliseconds\n",
1241            ((double)time_taken * 1000.0)/            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
1242            ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));              (double)CLOCKS_PER_SEC);
1243            }
1244    
1245          /* If find_match_limit is set, we want to do repeated matches with
1246          varying limits in order to find the minimum value. */
1247    
1248          if (find_match_limit)
1249            {
1250            int min = 0;
1251            int mid = 64;
1252            int max = -1;
1253    
1254            if (extra == NULL)
1255              {
1256              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1257              extra->flags = 0;
1258              }
1259            extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1260    
1261            for (;;)
1262              {
1263              extra->match_limit = mid;
1264              count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
1265                options | g_notempty, use_offsets, use_size_offsets);
1266              if (count == PCRE_ERROR_MATCHLIMIT)
1267                {
1268                /* fprintf(outfile, "Testing match limit = %d\n", mid); */
1269                min = mid;
1270                mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1271                }
1272              else if (count >= 0 || count == PCRE_ERROR_NOMATCH)
1273                {
1274                if (mid == min + 1)
1275                  {
1276                  fprintf(outfile, "Minimum match limit = %d\n", mid);
1277                  break;
1278                  }
1279                /* fprintf(outfile, "Testing match limit = %d\n", mid); */
1280                max = mid;
1281                mid = (min + mid)/2;
1282                }
1283              else break;    /* Some other error */
1284              }
1285    
1286            extra->flags &= ~PCRE_EXTRA_MATCH_LIMIT;
1287            }
1288    
1289          /* If callout_data is set, use the interface with additional data */
1290    
1291          else if (callout_data_set)
1292            {
1293            if (extra == NULL)
1294              {
1295              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1296              extra->flags = 0;
1297              }
1298            extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
1299            extra->callout_data = &callout_data;
1300            count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
1301              options | g_notempty, use_offsets, use_size_offsets);
1302            extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
1303          }          }
1304    
1305        count = pcre_exec(re, extra, (char *)bptr, len,        /* The normal case is just to do the match once, with the default
1306          start_offset, options | g_notempty, offsets, size_offsets);        value of match_limit. */
1307    
1308          else count = pcre_exec(re, extra, (char *)bptr, len,
1309            start_offset, options | g_notempty, use_offsets, use_size_offsets);
1310    
1311        if (count == 0)        if (count == 0)
1312          {          {
1313          fprintf(outfile, "Matched, but too many substrings\n");          fprintf(outfile, "Matched, but too many substrings\n");
1314          count = size_offsets/3;          count = use_size_offsets/3;
1315          }          }
1316    
1317        /* Matched */        /* Matched */
# Line 926  while (!done) Line 1321  while (!done)
1321          int i;          int i;
1322          for (i = 0; i < count * 2; i += 2)          for (i = 0; i < count * 2; i += 2)
1323            {            {
1324            if (offsets[i] < 0)            if (use_offsets[i] < 0)
1325              fprintf(outfile, "%2d: <unset>\n", i/2);              fprintf(outfile, "%2d: <unset>\n", i/2);
1326            else            else
1327              {              {
1328              fprintf(outfile, "%2d: ", i/2);              fprintf(outfile, "%2d: ", i/2);
1329              pchars(bptr + offsets[i], offsets[i+1] - offsets[i]);              (void)pchars(bptr + use_offsets[i],
1330                  use_offsets[i+1] - use_offsets[i], outfile);
1331              fprintf(outfile, "\n");              fprintf(outfile, "\n");
1332              if (i == 0)              if (i == 0)
1333                {                {
1334                if (do_showrest)                if (do_showrest)
1335                  {                  {
1336                  fprintf(outfile, " 0+ ");                  fprintf(outfile, " 0+ ");
1337                  pchars(bptr + offsets[i+1], len - offsets[i+1]);                  (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
1338                      outfile);
1339                  fprintf(outfile, "\n");                  fprintf(outfile, "\n");
1340                  }                  }
1341                }                }
# Line 950  while (!done) Line 1347  while (!done)
1347            if ((copystrings & (1 << i)) != 0)            if ((copystrings & (1 << i)) != 0)
1348              {              {
1349              char copybuffer[16];              char copybuffer[16];
1350              int rc = pcre_copy_substring((char *)bptr, offsets, count,              int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
1351                i, copybuffer, sizeof(copybuffer));                i, copybuffer, sizeof(copybuffer));
1352              if (rc < 0)              if (rc < 0)
1353                fprintf(outfile, "copy substring %d failed %d\n", i, rc);                fprintf(outfile, "copy substring %d failed %d\n", i, rc);
# Line 964  while (!done) Line 1361  while (!done)
1361            if ((getstrings & (1 << i)) != 0)            if ((getstrings & (1 << i)) != 0)
1362              {              {
1363              const char *substring;              const char *substring;
1364              int rc = pcre_get_substring((char *)bptr, offsets, count,              int rc = pcre_get_substring((char *)bptr, use_offsets, count,
1365                i, &substring);                i, &substring);
1366              if (rc < 0)              if (rc < 0)
1367                fprintf(outfile, "get substring %d failed %d\n", i, rc);                fprintf(outfile, "get substring %d failed %d\n", i, rc);
1368              else              else
1369                {                {
1370                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
1371                free((void *)substring);                /* free((void *)substring); */
1372                  pcre_free_substring(substring);
1373                }                }
1374              }              }
1375            }            }
# Line 979  while (!done) Line 1377  while (!done)
1377          if (getlist)          if (getlist)
1378            {            {
1379            const char **stringlist;            const char **stringlist;
1380            int rc = pcre_get_substring_list((char *)bptr, offsets, count,            int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
1381              &stringlist);              &stringlist);
1382            if (rc < 0)            if (rc < 0)
1383              fprintf(outfile, "get substring list failed %d\n", rc);              fprintf(outfile, "get substring list failed %d\n", rc);
# Line 989  while (!done) Line 1387  while (!done)
1387                fprintf(outfile, "%2dL %s\n", i, stringlist[i]);                fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
1388              if (stringlist[i] != NULL)              if (stringlist[i] != NULL)
1389                fprintf(outfile, "string list not terminated by NULL\n");                fprintf(outfile, "string list not terminated by NULL\n");
1390              free((void *)stringlist);              /* free((void *)stringlist); */
1391                pcre_free_substring_list(stringlist);
1392              }              }
1393            }            }
1394          }          }
1395    
1396        /* Failed to match. If this is a /g or /G loop and we previously set        /* Failed to match. If this is a /g or /G loop and we previously set
1397        PCRE_NOTEMPTY after a null match, this is not necessarily the end.        g_notempty after a null match, this is not necessarily the end.
1398        We want to advance the start offset, and continue. Fudge the offset        We want to advance the start offset, and continue. In the case of UTF-8
1399        values to achieve this. We won't be at the end of the string - that        matching, the advance must be one character, not one byte. Fudge the
1400        was checked before setting PCRE_NOTEMPTY. */        offset values to achieve this. We won't be at the end of the string -
1401          that was checked before setting g_notempty. */
1402    
1403        else        else
1404          {          {
1405          if (g_notempty != 0)          if (g_notempty != 0)
1406            {            {
1407            offsets[0] = start_offset;            int onechar = 1;
1408            offsets[1] = start_offset + 1;            use_offsets[0] = start_offset;
1409              if (use_utf8)
1410                {
1411                while (start_offset + onechar < len)
1412                  {
1413                  int tb = bptr[start_offset+onechar];
1414                  if (tb <= 127) break;
1415                  tb &= 0xc0;
1416                  if (tb != 0 && tb != 0xc0) onechar++;
1417                  }
1418                }
1419              use_offsets[1] = start_offset + onechar;
1420            }            }
1421          else          else
1422            {            {
1423            if (gmatched == 0)   /* Error if no previous matches */            if (count == PCRE_ERROR_NOMATCH)
1424              {              {
1425              if (count == -1) fprintf(outfile, "No match\n");              if (gmatched == 0) fprintf(outfile, "No match\n");
               else fprintf(outfile, "Error %d\n", count);  
1426              }              }
1427              else fprintf(outfile, "Error %d\n", count);
1428            break;  /* Out of the /g loop */            break;  /* Out of the /g loop */
1429            }            }
1430          }          }
# Line 1025  while (!done) Line 1436  while (!done)
1436        /* If we have matched an empty string, first check to see if we are at        /* If we have matched an empty string, first check to see if we are at
1437        the end of the subject. If so, the /g loop is over. Otherwise, mimic        the end of the subject. If so, the /g loop is over. Otherwise, mimic
1438        what Perl's /g options does. This turns out to be rather cunning. First        what Perl's /g options does. This turns out to be rather cunning. First
1439        we set PCRE_NOTEMPTY and try the match again at the same point. If this        we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
1440        fails (picked up above) we advance to the next character. */        same point. If this fails (picked up above) we advance to the next
1441          character. */
1442    
1443        g_notempty = 0;        g_notempty = 0;
1444        if (offsets[0] == offsets[1])        if (use_offsets[0] == use_offsets[1])
1445          {          {
1446          if (offsets[0] == len) break;          if (use_offsets[0] == len) break;
1447          g_notempty = PCRE_NOTEMPTY;          g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
1448          }          }
1449    
1450        /* For /g, update the start offset, leaving the rest alone */        /* For /g, update the start offset, leaving the rest alone */
1451    
1452        if (do_g) start_offset = offsets[1];        if (do_g) start_offset = use_offsets[1];
1453    
1454        /* For /G, update the pointer and length */        /* For /G, update the pointer and length */
1455    
1456        else        else
1457          {          {
1458          bptr += offsets[1];          bptr += use_offsets[1];
1459          len -= offsets[1];          len -= use_offsets[1];
1460          }          }
1461        }  /* End of loop for /g and /G */        }  /* End of loop for /g and /G */
1462      }    /* End of loop for data lines */      }    /* End of loop for data lines */
# Line 1064  while (!done) Line 1476  while (!done)
1476      }      }
1477    }    }
1478    
1479  fprintf(outfile, "\n");  if (infile == stdin) fprintf(outfile, "\n");
1480  return 0;  return 0;
1481  }  }
1482    

Legend:
Removed from v.45  
changed lines
  Added in v.73

  ViewVC Help
Powered by ViewVC 1.1.5